linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49        if ((flags & O_ACCMODE) == O_RDONLY)
  50                return GENERIC_READ;
  51        else if ((flags & O_ACCMODE) == O_WRONLY)
  52                return GENERIC_WRITE;
  53        else if ((flags & O_ACCMODE) == O_RDWR) {
  54                /* GENERIC_ALL is too much permission to request
  55                   can cause unnecessary access denied on create */
  56                /* return GENERIC_ALL; */
  57                return (GENERIC_READ | GENERIC_WRITE);
  58        }
  59
  60        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62                FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67        u32 posix_flags = 0;
  68
  69        if ((flags & O_ACCMODE) == O_RDONLY)
  70                posix_flags = SMB_O_RDONLY;
  71        else if ((flags & O_ACCMODE) == O_WRONLY)
  72                posix_flags = SMB_O_WRONLY;
  73        else if ((flags & O_ACCMODE) == O_RDWR)
  74                posix_flags = SMB_O_RDWR;
  75
  76        if (flags & O_CREAT) {
  77                posix_flags |= SMB_O_CREAT;
  78                if (flags & O_EXCL)
  79                        posix_flags |= SMB_O_EXCL;
  80        } else if (flags & O_EXCL)
  81                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82                         current->comm, current->tgid);
  83
  84        if (flags & O_TRUNC)
  85                posix_flags |= SMB_O_TRUNC;
  86        /* be safe and imply O_SYNC for O_DSYNC */
  87        if (flags & O_DSYNC)
  88                posix_flags |= SMB_O_SYNC;
  89        if (flags & O_DIRECTORY)
  90                posix_flags |= SMB_O_DIRECTORY;
  91        if (flags & O_NOFOLLOW)
  92                posix_flags |= SMB_O_NOFOLLOW;
  93        if (flags & O_DIRECT)
  94                posix_flags |= SMB_O_DIRECT;
  95
  96        return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102                return FILE_CREATE;
 103        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104                return FILE_OVERWRITE_IF;
 105        else if ((flags & O_CREAT) == O_CREAT)
 106                return FILE_OPEN_IF;
 107        else if ((flags & O_TRUNC) == O_TRUNC)
 108                return FILE_OVERWRITE;
 109        else
 110                return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114                        struct super_block *sb, int mode, unsigned int f_flags,
 115                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117        int rc;
 118        FILE_UNIX_BASIC_INFO *presp_data;
 119        __u32 posix_flags = 0;
 120        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121        struct cifs_fattr fattr;
 122        struct tcon_link *tlink;
 123        struct cifs_tcon *tcon;
 124
 125        cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128        if (presp_data == NULL)
 129                return -ENOMEM;
 130
 131        tlink = cifs_sb_tlink(cifs_sb);
 132        if (IS_ERR(tlink)) {
 133                rc = PTR_ERR(tlink);
 134                goto posix_open_ret;
 135        }
 136
 137        tcon = tlink_tcon(tlink);
 138        mode &= ~current_umask();
 139
 140        posix_flags = cifs_posix_convert_flags(f_flags);
 141        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142                             poplock, full_path, cifs_sb->local_nls,
 143                             cifs_sb->mnt_cifs_flags &
 144                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
 145        cifs_put_tlink(tlink);
 146
 147        if (rc)
 148                goto posix_open_ret;
 149
 150        if (presp_data->Type == cpu_to_le32(-1))
 151                goto posix_open_ret; /* open ok, caller does qpathinfo */
 152
 153        if (!pinode)
 154                goto posix_open_ret; /* caller does not need info */
 155
 156        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 157
 158        /* get new inode and set it up */
 159        if (*pinode == NULL) {
 160                cifs_fill_uniqueid(sb, &fattr);
 161                *pinode = cifs_iget(sb, &fattr);
 162                if (!*pinode) {
 163                        rc = -ENOMEM;
 164                        goto posix_open_ret;
 165                }
 166        } else {
 167                cifs_fattr_to_inode(*pinode, &fattr);
 168        }
 169
 170posix_open_ret:
 171        kfree(presp_data);
 172        return rc;
 173}
 174
 175static int
 176cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 177             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 178             struct cifs_fid *fid, unsigned int xid)
 179{
 180        int rc;
 181        int desired_access;
 182        int disposition;
 183        int create_options = CREATE_NOT_DIR;
 184        FILE_ALL_INFO *buf;
 185        struct TCP_Server_Info *server = tcon->ses->server;
 186        struct cifs_open_parms oparms;
 187
 188        if (!server->ops->open)
 189                return -ENOSYS;
 190
 191        desired_access = cifs_convert_flags(f_flags);
 192
 193/*********************************************************************
 194 *  open flag mapping table:
 195 *
 196 *      POSIX Flag            CIFS Disposition
 197 *      ----------            ----------------
 198 *      O_CREAT               FILE_OPEN_IF
 199 *      O_CREAT | O_EXCL      FILE_CREATE
 200 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 201 *      O_TRUNC               FILE_OVERWRITE
 202 *      none of the above     FILE_OPEN
 203 *
 204 *      Note that there is not a direct match between disposition
 205 *      FILE_SUPERSEDE (ie create whether or not file exists although
 206 *      O_CREAT | O_TRUNC is similar but truncates the existing
 207 *      file rather than creating a new file as FILE_SUPERSEDE does
 208 *      (which uses the attributes / metadata passed in on open call)
 209 *?
 210 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 211 *?  and the read write flags match reasonably.  O_LARGEFILE
 212 *?  is irrelevant because largefile support is always used
 213 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 214 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 215 *********************************************************************/
 216
 217        disposition = cifs_get_disposition(f_flags);
 218
 219        /* BB pass O_SYNC flag through on file attributes .. BB */
 220
 221        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 222        if (!buf)
 223                return -ENOMEM;
 224
 225        if (backup_cred(cifs_sb))
 226                create_options |= CREATE_OPEN_BACKUP_INTENT;
 227
 228        oparms.tcon = tcon;
 229        oparms.cifs_sb = cifs_sb;
 230        oparms.desired_access = desired_access;
 231        oparms.create_options = create_options;
 232        oparms.disposition = disposition;
 233        oparms.path = full_path;
 234        oparms.fid = fid;
 235        oparms.reconnect = false;
 236
 237        rc = server->ops->open(xid, &oparms, oplock, buf);
 238
 239        if (rc)
 240                goto out;
 241
 242        if (tcon->unix_ext)
 243                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 244                                              xid);
 245        else
 246                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 247                                         xid, &fid->netfid);
 248
 249out:
 250        kfree(buf);
 251        return rc;
 252}
 253
 254static bool
 255cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 256{
 257        struct cifs_fid_locks *cur;
 258        bool has_locks = false;
 259
 260        down_read(&cinode->lock_sem);
 261        list_for_each_entry(cur, &cinode->llist, llist) {
 262                if (!list_empty(&cur->locks)) {
 263                        has_locks = true;
 264                        break;
 265                }
 266        }
 267        up_read(&cinode->lock_sem);
 268        return has_locks;
 269}
 270
 271struct cifsFileInfo *
 272cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 273                  struct tcon_link *tlink, __u32 oplock)
 274{
 275        struct dentry *dentry = file->f_path.dentry;
 276        struct inode *inode = dentry->d_inode;
 277        struct cifsInodeInfo *cinode = CIFS_I(inode);
 278        struct cifsFileInfo *cfile;
 279        struct cifs_fid_locks *fdlocks;
 280        struct cifs_tcon *tcon = tlink_tcon(tlink);
 281        struct TCP_Server_Info *server = tcon->ses->server;
 282
 283        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 284        if (cfile == NULL)
 285                return cfile;
 286
 287        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 288        if (!fdlocks) {
 289                kfree(cfile);
 290                return NULL;
 291        }
 292
 293        INIT_LIST_HEAD(&fdlocks->locks);
 294        fdlocks->cfile = cfile;
 295        cfile->llist = fdlocks;
 296        down_write(&cinode->lock_sem);
 297        list_add(&fdlocks->llist, &cinode->llist);
 298        up_write(&cinode->lock_sem);
 299
 300        cfile->count = 1;
 301        cfile->pid = current->tgid;
 302        cfile->uid = current_fsuid();
 303        cfile->dentry = dget(dentry);
 304        cfile->f_flags = file->f_flags;
 305        cfile->invalidHandle = false;
 306        cfile->tlink = cifs_get_tlink(tlink);
 307        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 308        mutex_init(&cfile->fh_mutex);
 309
 310        cifs_sb_active(inode->i_sb);
 311
 312        /*
 313         * If the server returned a read oplock and we have mandatory brlocks,
 314         * set oplock level to None.
 315         */
 316        if (oplock == server->vals->oplock_read &&
 317                                                cifs_has_mand_locks(cinode)) {
 318                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 319                oplock = 0;
 320        }
 321
 322        spin_lock(&cifs_file_list_lock);
 323        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 324                oplock = fid->pending_open->oplock;
 325        list_del(&fid->pending_open->olist);
 326
 327        server->ops->set_fid(cfile, fid, oplock);
 328
 329        list_add(&cfile->tlist, &tcon->openFileList);
 330        /* if readable file instance put first in list*/
 331        if (file->f_mode & FMODE_READ)
 332                list_add(&cfile->flist, &cinode->openFileList);
 333        else
 334                list_add_tail(&cfile->flist, &cinode->openFileList);
 335        spin_unlock(&cifs_file_list_lock);
 336
 337        file->private_data = cfile;
 338        return cfile;
 339}
 340
 341struct cifsFileInfo *
 342cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 343{
 344        spin_lock(&cifs_file_list_lock);
 345        cifsFileInfo_get_locked(cifs_file);
 346        spin_unlock(&cifs_file_list_lock);
 347        return cifs_file;
 348}
 349
 350/*
 351 * Release a reference on the file private data. This may involve closing
 352 * the filehandle out on the server. Must be called without holding
 353 * cifs_file_list_lock.
 354 */
 355void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 356{
 357        struct inode *inode = cifs_file->dentry->d_inode;
 358        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 359        struct TCP_Server_Info *server = tcon->ses->server;
 360        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 361        struct super_block *sb = inode->i_sb;
 362        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 363        struct cifsLockInfo *li, *tmp;
 364        struct cifs_fid fid;
 365        struct cifs_pending_open open;
 366
 367        spin_lock(&cifs_file_list_lock);
 368        if (--cifs_file->count > 0) {
 369                spin_unlock(&cifs_file_list_lock);
 370                return;
 371        }
 372
 373        if (server->ops->get_lease_key)
 374                server->ops->get_lease_key(inode, &fid);
 375
 376        /* store open in pending opens to make sure we don't miss lease break */
 377        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 378
 379        /* remove it from the lists */
 380        list_del(&cifs_file->flist);
 381        list_del(&cifs_file->tlist);
 382
 383        if (list_empty(&cifsi->openFileList)) {
 384                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 385                         cifs_file->dentry->d_inode);
 386                /*
 387                 * In strict cache mode we need invalidate mapping on the last
 388                 * close  because it may cause a error when we open this file
 389                 * again and get at least level II oplock.
 390                 */
 391                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 392                        CIFS_I(inode)->invalid_mapping = true;
 393                cifs_set_oplock_level(cifsi, 0);
 394        }
 395        spin_unlock(&cifs_file_list_lock);
 396
 397        cancel_work_sync(&cifs_file->oplock_break);
 398
 399        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 400                struct TCP_Server_Info *server = tcon->ses->server;
 401                unsigned int xid;
 402
 403                xid = get_xid();
 404                if (server->ops->close)
 405                        server->ops->close(xid, tcon, &cifs_file->fid);
 406                _free_xid(xid);
 407        }
 408
 409        cifs_del_pending_open(&open);
 410
 411        /*
 412         * Delete any outstanding lock records. We'll lose them when the file
 413         * is closed anyway.
 414         */
 415        down_write(&cifsi->lock_sem);
 416        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 417                list_del(&li->llist);
 418                cifs_del_lock_waiters(li);
 419                kfree(li);
 420        }
 421        list_del(&cifs_file->llist->llist);
 422        kfree(cifs_file->llist);
 423        up_write(&cifsi->lock_sem);
 424
 425        cifs_put_tlink(cifs_file->tlink);
 426        dput(cifs_file->dentry);
 427        cifs_sb_deactive(sb);
 428        kfree(cifs_file);
 429}
 430
 431int cifs_open(struct inode *inode, struct file *file)
 432
 433{
 434        int rc = -EACCES;
 435        unsigned int xid;
 436        __u32 oplock;
 437        struct cifs_sb_info *cifs_sb;
 438        struct TCP_Server_Info *server;
 439        struct cifs_tcon *tcon;
 440        struct tcon_link *tlink;
 441        struct cifsFileInfo *cfile = NULL;
 442        char *full_path = NULL;
 443        bool posix_open_ok = false;
 444        struct cifs_fid fid;
 445        struct cifs_pending_open open;
 446
 447        xid = get_xid();
 448
 449        cifs_sb = CIFS_SB(inode->i_sb);
 450        tlink = cifs_sb_tlink(cifs_sb);
 451        if (IS_ERR(tlink)) {
 452                free_xid(xid);
 453                return PTR_ERR(tlink);
 454        }
 455        tcon = tlink_tcon(tlink);
 456        server = tcon->ses->server;
 457
 458        full_path = build_path_from_dentry(file->f_path.dentry);
 459        if (full_path == NULL) {
 460                rc = -ENOMEM;
 461                goto out;
 462        }
 463
 464        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 465                 inode, file->f_flags, full_path);
 466
 467        if (server->oplocks)
 468                oplock = REQ_OPLOCK;
 469        else
 470                oplock = 0;
 471
 472        if (!tcon->broken_posix_open && tcon->unix_ext &&
 473            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 474                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 475                /* can not refresh inode info since size could be stale */
 476                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 477                                cifs_sb->mnt_file_mode /* ignored */,
 478                                file->f_flags, &oplock, &fid.netfid, xid);
 479                if (rc == 0) {
 480                        cifs_dbg(FYI, "posix open succeeded\n");
 481                        posix_open_ok = true;
 482                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 483                        if (tcon->ses->serverNOS)
 484                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 485                                         tcon->ses->serverName,
 486                                         tcon->ses->serverNOS);
 487                        tcon->broken_posix_open = true;
 488                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 489                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 490                        goto out;
 491                /*
 492                 * Else fallthrough to retry open the old way on network i/o
 493                 * or DFS errors.
 494                 */
 495        }
 496
 497        if (server->ops->get_lease_key)
 498                server->ops->get_lease_key(inode, &fid);
 499
 500        cifs_add_pending_open(&fid, tlink, &open);
 501
 502        if (!posix_open_ok) {
 503                if (server->ops->get_lease_key)
 504                        server->ops->get_lease_key(inode, &fid);
 505
 506                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 507                                  file->f_flags, &oplock, &fid, xid);
 508                if (rc) {
 509                        cifs_del_pending_open(&open);
 510                        goto out;
 511                }
 512        }
 513
 514        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 515        if (cfile == NULL) {
 516                if (server->ops->close)
 517                        server->ops->close(xid, tcon, &fid);
 518                cifs_del_pending_open(&open);
 519                rc = -ENOMEM;
 520                goto out;
 521        }
 522
 523        cifs_fscache_set_inode_cookie(inode, file);
 524
 525        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 526                /*
 527                 * Time to set mode which we can not set earlier due to
 528                 * problems creating new read-only files.
 529                 */
 530                struct cifs_unix_set_info_args args = {
 531                        .mode   = inode->i_mode,
 532                        .uid    = INVALID_UID, /* no change */
 533                        .gid    = INVALID_GID, /* no change */
 534                        .ctime  = NO_CHANGE_64,
 535                        .atime  = NO_CHANGE_64,
 536                        .mtime  = NO_CHANGE_64,
 537                        .device = 0,
 538                };
 539                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 540                                       cfile->pid);
 541        }
 542
 543out:
 544        kfree(full_path);
 545        free_xid(xid);
 546        cifs_put_tlink(tlink);
 547        return rc;
 548}
 549
 550static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 551
 552/*
 553 * Try to reacquire byte range locks that were released when session
 554 * to server was lost.
 555 */
 556static int
 557cifs_relock_file(struct cifsFileInfo *cfile)
 558{
 559        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 560        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 561        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 562        int rc = 0;
 563
 564        down_read(&cinode->lock_sem);
 565        if (cinode->can_cache_brlcks) {
 566                /* can cache locks - no need to relock */
 567                up_read(&cinode->lock_sem);
 568                return rc;
 569        }
 570
 571        if (cap_unix(tcon->ses) &&
 572            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 573            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 574                rc = cifs_push_posix_locks(cfile);
 575        else
 576                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 577
 578        up_read(&cinode->lock_sem);
 579        return rc;
 580}
 581
 582static int
 583cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 584{
 585        int rc = -EACCES;
 586        unsigned int xid;
 587        __u32 oplock;
 588        struct cifs_sb_info *cifs_sb;
 589        struct cifs_tcon *tcon;
 590        struct TCP_Server_Info *server;
 591        struct cifsInodeInfo *cinode;
 592        struct inode *inode;
 593        char *full_path = NULL;
 594        int desired_access;
 595        int disposition = FILE_OPEN;
 596        int create_options = CREATE_NOT_DIR;
 597        struct cifs_open_parms oparms;
 598
 599        xid = get_xid();
 600        mutex_lock(&cfile->fh_mutex);
 601        if (!cfile->invalidHandle) {
 602                mutex_unlock(&cfile->fh_mutex);
 603                rc = 0;
 604                free_xid(xid);
 605                return rc;
 606        }
 607
 608        inode = cfile->dentry->d_inode;
 609        cifs_sb = CIFS_SB(inode->i_sb);
 610        tcon = tlink_tcon(cfile->tlink);
 611        server = tcon->ses->server;
 612
 613        /*
 614         * Can not grab rename sem here because various ops, including those
 615         * that already have the rename sem can end up causing writepage to get
 616         * called and if the server was down that means we end up here, and we
 617         * can never tell if the caller already has the rename_sem.
 618         */
 619        full_path = build_path_from_dentry(cfile->dentry);
 620        if (full_path == NULL) {
 621                rc = -ENOMEM;
 622                mutex_unlock(&cfile->fh_mutex);
 623                free_xid(xid);
 624                return rc;
 625        }
 626
 627        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 628                 inode, cfile->f_flags, full_path);
 629
 630        if (tcon->ses->server->oplocks)
 631                oplock = REQ_OPLOCK;
 632        else
 633                oplock = 0;
 634
 635        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 636            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 637                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 638                /*
 639                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 640                 * original open. Must mask them off for a reopen.
 641                 */
 642                unsigned int oflags = cfile->f_flags &
 643                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 644
 645                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 646                                     cifs_sb->mnt_file_mode /* ignored */,
 647                                     oflags, &oplock, &cfile->fid.netfid, xid);
 648                if (rc == 0) {
 649                        cifs_dbg(FYI, "posix reopen succeeded\n");
 650                        oparms.reconnect = true;
 651                        goto reopen_success;
 652                }
 653                /*
 654                 * fallthrough to retry open the old way on errors, especially
 655                 * in the reconnect path it is important to retry hard
 656                 */
 657        }
 658
 659        desired_access = cifs_convert_flags(cfile->f_flags);
 660
 661        if (backup_cred(cifs_sb))
 662                create_options |= CREATE_OPEN_BACKUP_INTENT;
 663
 664        if (server->ops->get_lease_key)
 665                server->ops->get_lease_key(inode, &cfile->fid);
 666
 667        oparms.tcon = tcon;
 668        oparms.cifs_sb = cifs_sb;
 669        oparms.desired_access = desired_access;
 670        oparms.create_options = create_options;
 671        oparms.disposition = disposition;
 672        oparms.path = full_path;
 673        oparms.fid = &cfile->fid;
 674        oparms.reconnect = true;
 675
 676        /*
 677         * Can not refresh inode by passing in file_info buf to be returned by
 678         * CIFSSMBOpen and then calling get_inode_info with returned buf since
 679         * file might have write behind data that needs to be flushed and server
 680         * version of file size can be stale. If we knew for sure that inode was
 681         * not dirty locally we could do this.
 682         */
 683        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 684        if (rc == -ENOENT && oparms.reconnect == false) {
 685                /* durable handle timeout is expired - open the file again */
 686                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 687                /* indicate that we need to relock the file */
 688                oparms.reconnect = true;
 689        }
 690
 691        if (rc) {
 692                mutex_unlock(&cfile->fh_mutex);
 693                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 694                cifs_dbg(FYI, "oplock: %d\n", oplock);
 695                goto reopen_error_exit;
 696        }
 697
 698reopen_success:
 699        cfile->invalidHandle = false;
 700        mutex_unlock(&cfile->fh_mutex);
 701        cinode = CIFS_I(inode);
 702
 703        if (can_flush) {
 704                rc = filemap_write_and_wait(inode->i_mapping);
 705                mapping_set_error(inode->i_mapping, rc);
 706
 707                if (tcon->unix_ext)
 708                        rc = cifs_get_inode_info_unix(&inode, full_path,
 709                                                      inode->i_sb, xid);
 710                else
 711                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 712                                                 inode->i_sb, xid, NULL);
 713        }
 714        /*
 715         * Else we are writing out data to server already and could deadlock if
 716         * we tried to flush data, and since we do not know if we have data that
 717         * would invalidate the current end of file on the server we can not go
 718         * to the server to get the new inode info.
 719         */
 720
 721        server->ops->set_fid(cfile, &cfile->fid, oplock);
 722        if (oparms.reconnect)
 723                cifs_relock_file(cfile);
 724
 725reopen_error_exit:
 726        kfree(full_path);
 727        free_xid(xid);
 728        return rc;
 729}
 730
 731int cifs_close(struct inode *inode, struct file *file)
 732{
 733        if (file->private_data != NULL) {
 734                cifsFileInfo_put(file->private_data);
 735                file->private_data = NULL;
 736        }
 737
 738        /* return code from the ->release op is always ignored */
 739        return 0;
 740}
 741
 742int cifs_closedir(struct inode *inode, struct file *file)
 743{
 744        int rc = 0;
 745        unsigned int xid;
 746        struct cifsFileInfo *cfile = file->private_data;
 747        struct cifs_tcon *tcon;
 748        struct TCP_Server_Info *server;
 749        char *buf;
 750
 751        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 752
 753        if (cfile == NULL)
 754                return rc;
 755
 756        xid = get_xid();
 757        tcon = tlink_tcon(cfile->tlink);
 758        server = tcon->ses->server;
 759
 760        cifs_dbg(FYI, "Freeing private data in close dir\n");
 761        spin_lock(&cifs_file_list_lock);
 762        if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
 763                cfile->invalidHandle = true;
 764                spin_unlock(&cifs_file_list_lock);
 765                if (server->ops->close_dir)
 766                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 767                else
 768                        rc = -ENOSYS;
 769                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 770                /* not much we can do if it fails anyway, ignore rc */
 771                rc = 0;
 772        } else
 773                spin_unlock(&cifs_file_list_lock);
 774
 775        buf = cfile->srch_inf.ntwrk_buf_start;
 776        if (buf) {
 777                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 778                cfile->srch_inf.ntwrk_buf_start = NULL;
 779                if (cfile->srch_inf.smallBuf)
 780                        cifs_small_buf_release(buf);
 781                else
 782                        cifs_buf_release(buf);
 783        }
 784
 785        cifs_put_tlink(cfile->tlink);
 786        kfree(file->private_data);
 787        file->private_data = NULL;
 788        /* BB can we lock the filestruct while this is going on? */
 789        free_xid(xid);
 790        return rc;
 791}
 792
 793static struct cifsLockInfo *
 794cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 795{
 796        struct cifsLockInfo *lock =
 797                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 798        if (!lock)
 799                return lock;
 800        lock->offset = offset;
 801        lock->length = length;
 802        lock->type = type;
 803        lock->pid = current->tgid;
 804        INIT_LIST_HEAD(&lock->blist);
 805        init_waitqueue_head(&lock->block_q);
 806        return lock;
 807}
 808
 809void
 810cifs_del_lock_waiters(struct cifsLockInfo *lock)
 811{
 812        struct cifsLockInfo *li, *tmp;
 813        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 814                list_del_init(&li->blist);
 815                wake_up(&li->block_q);
 816        }
 817}
 818
 819#define CIFS_LOCK_OP    0
 820#define CIFS_READ_OP    1
 821#define CIFS_WRITE_OP   2
 822
 823/* @rw_check : 0 - no op, 1 - read, 2 - write */
 824static bool
 825cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 826                            __u64 length, __u8 type, struct cifsFileInfo *cfile,
 827                            struct cifsLockInfo **conf_lock, int rw_check)
 828{
 829        struct cifsLockInfo *li;
 830        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 831        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 832
 833        list_for_each_entry(li, &fdlocks->locks, llist) {
 834                if (offset + length <= li->offset ||
 835                    offset >= li->offset + li->length)
 836                        continue;
 837                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 838                    server->ops->compare_fids(cfile, cur_cfile)) {
 839                        /* shared lock prevents write op through the same fid */
 840                        if (!(li->type & server->vals->shared_lock_type) ||
 841                            rw_check != CIFS_WRITE_OP)
 842                                continue;
 843                }
 844                if ((type & server->vals->shared_lock_type) &&
 845                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 846                     current->tgid == li->pid) || type == li->type))
 847                        continue;
 848                if (conf_lock)
 849                        *conf_lock = li;
 850                return true;
 851        }
 852        return false;
 853}
 854
 855bool
 856cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 857                        __u8 type, struct cifsLockInfo **conf_lock,
 858                        int rw_check)
 859{
 860        bool rc = false;
 861        struct cifs_fid_locks *cur;
 862        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 863
 864        list_for_each_entry(cur, &cinode->llist, llist) {
 865                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 866                                                 cfile, conf_lock, rw_check);
 867                if (rc)
 868                        break;
 869        }
 870
 871        return rc;
 872}
 873
 874/*
 875 * Check if there is another lock that prevents us to set the lock (mandatory
 876 * style). If such a lock exists, update the flock structure with its
 877 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 878 * or leave it the same if we can't. Returns 0 if we don't need to request to
 879 * the server or 1 otherwise.
 880 */
 881static int
 882cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 883               __u8 type, struct file_lock *flock)
 884{
 885        int rc = 0;
 886        struct cifsLockInfo *conf_lock;
 887        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 888        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 889        bool exist;
 890
 891        down_read(&cinode->lock_sem);
 892
 893        exist = cifs_find_lock_conflict(cfile, offset, length, type,
 894                                        &conf_lock, CIFS_LOCK_OP);
 895        if (exist) {
 896                flock->fl_start = conf_lock->offset;
 897                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 898                flock->fl_pid = conf_lock->pid;
 899                if (conf_lock->type & server->vals->shared_lock_type)
 900                        flock->fl_type = F_RDLCK;
 901                else
 902                        flock->fl_type = F_WRLCK;
 903        } else if (!cinode->can_cache_brlcks)
 904                rc = 1;
 905        else
 906                flock->fl_type = F_UNLCK;
 907
 908        up_read(&cinode->lock_sem);
 909        return rc;
 910}
 911
 912static void
 913cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 914{
 915        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 916        down_write(&cinode->lock_sem);
 917        list_add_tail(&lock->llist, &cfile->llist->locks);
 918        up_write(&cinode->lock_sem);
 919}
 920
 921/*
 922 * Set the byte-range lock (mandatory style). Returns:
 923 * 1) 0, if we set the lock and don't need to request to the server;
 924 * 2) 1, if no locks prevent us but we need to request to the server;
 925 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 926 */
 927static int
 928cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 929                 bool wait)
 930{
 931        struct cifsLockInfo *conf_lock;
 932        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 933        bool exist;
 934        int rc = 0;
 935
 936try_again:
 937        exist = false;
 938        down_write(&cinode->lock_sem);
 939
 940        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 941                                        lock->type, &conf_lock, CIFS_LOCK_OP);
 942        if (!exist && cinode->can_cache_brlcks) {
 943                list_add_tail(&lock->llist, &cfile->llist->locks);
 944                up_write(&cinode->lock_sem);
 945                return rc;
 946        }
 947
 948        if (!exist)
 949                rc = 1;
 950        else if (!wait)
 951                rc = -EACCES;
 952        else {
 953                list_add_tail(&lock->blist, &conf_lock->blist);
 954                up_write(&cinode->lock_sem);
 955                rc = wait_event_interruptible(lock->block_q,
 956                                        (lock->blist.prev == &lock->blist) &&
 957                                        (lock->blist.next == &lock->blist));
 958                if (!rc)
 959                        goto try_again;
 960                down_write(&cinode->lock_sem);
 961                list_del_init(&lock->blist);
 962        }
 963
 964        up_write(&cinode->lock_sem);
 965        return rc;
 966}
 967
 968/*
 969 * Check if there is another lock that prevents us to set the lock (posix
 970 * style). If such a lock exists, update the flock structure with its
 971 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 972 * or leave it the same if we can't. Returns 0 if we don't need to request to
 973 * the server or 1 otherwise.
 974 */
 975static int
 976cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 977{
 978        int rc = 0;
 979        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 980        unsigned char saved_type = flock->fl_type;
 981
 982        if ((flock->fl_flags & FL_POSIX) == 0)
 983                return 1;
 984
 985        down_read(&cinode->lock_sem);
 986        posix_test_lock(file, flock);
 987
 988        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
 989                flock->fl_type = saved_type;
 990                rc = 1;
 991        }
 992
 993        up_read(&cinode->lock_sem);
 994        return rc;
 995}
 996
 997/*
 998 * Set the byte-range lock (posix style). Returns:
 999 * 1) 0, if we set the lock and don't need to request to the server;
1000 * 2) 1, if we need to request to the server;
1001 * 3) <0, if the error occurs while setting the lock.
1002 */
1003static int
1004cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1005{
1006        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1007        int rc = 1;
1008
1009        if ((flock->fl_flags & FL_POSIX) == 0)
1010                return rc;
1011
1012try_again:
1013        down_write(&cinode->lock_sem);
1014        if (!cinode->can_cache_brlcks) {
1015                up_write(&cinode->lock_sem);
1016                return rc;
1017        }
1018
1019        rc = posix_lock_file(file, flock, NULL);
1020        up_write(&cinode->lock_sem);
1021        if (rc == FILE_LOCK_DEFERRED) {
1022                rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1023                if (!rc)
1024                        goto try_again;
1025                posix_unblock_lock(flock);
1026        }
1027        return rc;
1028}
1029
1030int
1031cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1032{
1033        unsigned int xid;
1034        int rc = 0, stored_rc;
1035        struct cifsLockInfo *li, *tmp;
1036        struct cifs_tcon *tcon;
1037        unsigned int num, max_num, max_buf;
1038        LOCKING_ANDX_RANGE *buf, *cur;
1039        int types[] = {LOCKING_ANDX_LARGE_FILES,
1040                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1041        int i;
1042
1043        xid = get_xid();
1044        tcon = tlink_tcon(cfile->tlink);
1045
1046        /*
1047         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1048         * and check it for zero before using.
1049         */
1050        max_buf = tcon->ses->server->maxBuf;
1051        if (!max_buf) {
1052                free_xid(xid);
1053                return -EINVAL;
1054        }
1055
1056        max_num = (max_buf - sizeof(struct smb_hdr)) /
1057                                                sizeof(LOCKING_ANDX_RANGE);
1058        buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1059        if (!buf) {
1060                free_xid(xid);
1061                return -ENOMEM;
1062        }
1063
1064        for (i = 0; i < 2; i++) {
1065                cur = buf;
1066                num = 0;
1067                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1068                        if (li->type != types[i])
1069                                continue;
1070                        cur->Pid = cpu_to_le16(li->pid);
1071                        cur->LengthLow = cpu_to_le32((u32)li->length);
1072                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1073                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1074                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1075                        if (++num == max_num) {
1076                                stored_rc = cifs_lockv(xid, tcon,
1077                                                       cfile->fid.netfid,
1078                                                       (__u8)li->type, 0, num,
1079                                                       buf);
1080                                if (stored_rc)
1081                                        rc = stored_rc;
1082                                cur = buf;
1083                                num = 0;
1084                        } else
1085                                cur++;
1086                }
1087
1088                if (num) {
1089                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1090                                               (__u8)types[i], 0, num, buf);
1091                        if (stored_rc)
1092                                rc = stored_rc;
1093                }
1094        }
1095
1096        kfree(buf);
1097        free_xid(xid);
1098        return rc;
1099}
1100
1101/* copied from fs/locks.c with a name change */
1102#define cifs_for_each_lock(inode, lockp) \
1103        for (lockp = &inode->i_flock; *lockp != NULL; \
1104             lockp = &(*lockp)->fl_next)
1105
1106struct lock_to_push {
1107        struct list_head llist;
1108        __u64 offset;
1109        __u64 length;
1110        __u32 pid;
1111        __u16 netfid;
1112        __u8 type;
1113};
1114
1115static int
1116cifs_push_posix_locks(struct cifsFileInfo *cfile)
1117{
1118        struct inode *inode = cfile->dentry->d_inode;
1119        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1120        struct file_lock *flock, **before;
1121        unsigned int count = 0, i = 0;
1122        int rc = 0, xid, type;
1123        struct list_head locks_to_send, *el;
1124        struct lock_to_push *lck, *tmp;
1125        __u64 length;
1126
1127        xid = get_xid();
1128
1129        spin_lock(&inode->i_lock);
1130        cifs_for_each_lock(inode, before) {
1131                if ((*before)->fl_flags & FL_POSIX)
1132                        count++;
1133        }
1134        spin_unlock(&inode->i_lock);
1135
1136        INIT_LIST_HEAD(&locks_to_send);
1137
1138        /*
1139         * Allocating count locks is enough because no FL_POSIX locks can be
1140         * added to the list while we are holding cinode->lock_sem that
1141         * protects locking operations of this inode.
1142         */
1143        for (; i < count; i++) {
1144                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1145                if (!lck) {
1146                        rc = -ENOMEM;
1147                        goto err_out;
1148                }
1149                list_add_tail(&lck->llist, &locks_to_send);
1150        }
1151
1152        el = locks_to_send.next;
1153        spin_lock(&inode->i_lock);
1154        cifs_for_each_lock(inode, before) {
1155                flock = *before;
1156                if ((flock->fl_flags & FL_POSIX) == 0)
1157                        continue;
1158                if (el == &locks_to_send) {
1159                        /*
1160                         * The list ended. We don't have enough allocated
1161                         * structures - something is really wrong.
1162                         */
1163                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1164                        break;
1165                }
1166                length = 1 + flock->fl_end - flock->fl_start;
1167                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1168                        type = CIFS_RDLCK;
1169                else
1170                        type = CIFS_WRLCK;
1171                lck = list_entry(el, struct lock_to_push, llist);
1172                lck->pid = flock->fl_pid;
1173                lck->netfid = cfile->fid.netfid;
1174                lck->length = length;
1175                lck->type = type;
1176                lck->offset = flock->fl_start;
1177                el = el->next;
1178        }
1179        spin_unlock(&inode->i_lock);
1180
1181        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1182                int stored_rc;
1183
1184                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1185                                             lck->offset, lck->length, NULL,
1186                                             lck->type, 0);
1187                if (stored_rc)
1188                        rc = stored_rc;
1189                list_del(&lck->llist);
1190                kfree(lck);
1191        }
1192
1193out:
1194        free_xid(xid);
1195        return rc;
1196err_out:
1197        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1198                list_del(&lck->llist);
1199                kfree(lck);
1200        }
1201        goto out;
1202}
1203
1204static int
1205cifs_push_locks(struct cifsFileInfo *cfile)
1206{
1207        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1208        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1209        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1210        int rc = 0;
1211
1212        /* we are going to update can_cache_brlcks here - need a write access */
1213        down_write(&cinode->lock_sem);
1214        if (!cinode->can_cache_brlcks) {
1215                up_write(&cinode->lock_sem);
1216                return rc;
1217        }
1218
1219        if (cap_unix(tcon->ses) &&
1220            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1221            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1222                rc = cifs_push_posix_locks(cfile);
1223        else
1224                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1225
1226        cinode->can_cache_brlcks = false;
1227        up_write(&cinode->lock_sem);
1228        return rc;
1229}
1230
1231static void
1232cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1233                bool *wait_flag, struct TCP_Server_Info *server)
1234{
1235        if (flock->fl_flags & FL_POSIX)
1236                cifs_dbg(FYI, "Posix\n");
1237        if (flock->fl_flags & FL_FLOCK)
1238                cifs_dbg(FYI, "Flock\n");
1239        if (flock->fl_flags & FL_SLEEP) {
1240                cifs_dbg(FYI, "Blocking lock\n");
1241                *wait_flag = true;
1242        }
1243        if (flock->fl_flags & FL_ACCESS)
1244                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1245        if (flock->fl_flags & FL_LEASE)
1246                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1247        if (flock->fl_flags &
1248            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1249               FL_ACCESS | FL_LEASE | FL_CLOSE)))
1250                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1251
1252        *type = server->vals->large_lock_type;
1253        if (flock->fl_type == F_WRLCK) {
1254                cifs_dbg(FYI, "F_WRLCK\n");
1255                *type |= server->vals->exclusive_lock_type;
1256                *lock = 1;
1257        } else if (flock->fl_type == F_UNLCK) {
1258                cifs_dbg(FYI, "F_UNLCK\n");
1259                *type |= server->vals->unlock_lock_type;
1260                *unlock = 1;
1261                /* Check if unlock includes more than one lock range */
1262        } else if (flock->fl_type == F_RDLCK) {
1263                cifs_dbg(FYI, "F_RDLCK\n");
1264                *type |= server->vals->shared_lock_type;
1265                *lock = 1;
1266        } else if (flock->fl_type == F_EXLCK) {
1267                cifs_dbg(FYI, "F_EXLCK\n");
1268                *type |= server->vals->exclusive_lock_type;
1269                *lock = 1;
1270        } else if (flock->fl_type == F_SHLCK) {
1271                cifs_dbg(FYI, "F_SHLCK\n");
1272                *type |= server->vals->shared_lock_type;
1273                *lock = 1;
1274        } else
1275                cifs_dbg(FYI, "Unknown type of lock\n");
1276}
1277
1278static int
1279cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1280           bool wait_flag, bool posix_lck, unsigned int xid)
1281{
1282        int rc = 0;
1283        __u64 length = 1 + flock->fl_end - flock->fl_start;
1284        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1285        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1286        struct TCP_Server_Info *server = tcon->ses->server;
1287        __u16 netfid = cfile->fid.netfid;
1288
1289        if (posix_lck) {
1290                int posix_lock_type;
1291
1292                rc = cifs_posix_lock_test(file, flock);
1293                if (!rc)
1294                        return rc;
1295
1296                if (type & server->vals->shared_lock_type)
1297                        posix_lock_type = CIFS_RDLCK;
1298                else
1299                        posix_lock_type = CIFS_WRLCK;
1300                rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1301                                      flock->fl_start, length, flock,
1302                                      posix_lock_type, wait_flag);
1303                return rc;
1304        }
1305
1306        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1307        if (!rc)
1308                return rc;
1309
1310        /* BB we could chain these into one lock request BB */
1311        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1312                                    1, 0, false);
1313        if (rc == 0) {
1314                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1315                                            type, 0, 1, false);
1316                flock->fl_type = F_UNLCK;
1317                if (rc != 0)
1318                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1319                                 rc);
1320                return 0;
1321        }
1322
1323        if (type & server->vals->shared_lock_type) {
1324                flock->fl_type = F_WRLCK;
1325                return 0;
1326        }
1327
1328        type &= ~server->vals->exclusive_lock_type;
1329
1330        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1331                                    type | server->vals->shared_lock_type,
1332                                    1, 0, false);
1333        if (rc == 0) {
1334                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1335                        type | server->vals->shared_lock_type, 0, 1, false);
1336                flock->fl_type = F_RDLCK;
1337                if (rc != 0)
1338                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1339                                 rc);
1340        } else
1341                flock->fl_type = F_WRLCK;
1342
1343        return 0;
1344}
1345
1346void
1347cifs_move_llist(struct list_head *source, struct list_head *dest)
1348{
1349        struct list_head *li, *tmp;
1350        list_for_each_safe(li, tmp, source)
1351                list_move(li, dest);
1352}
1353
1354void
1355cifs_free_llist(struct list_head *llist)
1356{
1357        struct cifsLockInfo *li, *tmp;
1358        list_for_each_entry_safe(li, tmp, llist, llist) {
1359                cifs_del_lock_waiters(li);
1360                list_del(&li->llist);
1361                kfree(li);
1362        }
1363}
1364
1365int
1366cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1367                  unsigned int xid)
1368{
1369        int rc = 0, stored_rc;
1370        int types[] = {LOCKING_ANDX_LARGE_FILES,
1371                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1372        unsigned int i;
1373        unsigned int max_num, num, max_buf;
1374        LOCKING_ANDX_RANGE *buf, *cur;
1375        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1376        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1377        struct cifsLockInfo *li, *tmp;
1378        __u64 length = 1 + flock->fl_end - flock->fl_start;
1379        struct list_head tmp_llist;
1380
1381        INIT_LIST_HEAD(&tmp_llist);
1382
1383        /*
1384         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1385         * and check it for zero before using.
1386         */
1387        max_buf = tcon->ses->server->maxBuf;
1388        if (!max_buf)
1389                return -EINVAL;
1390
1391        max_num = (max_buf - sizeof(struct smb_hdr)) /
1392                                                sizeof(LOCKING_ANDX_RANGE);
1393        buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1394        if (!buf)
1395                return -ENOMEM;
1396
1397        down_write(&cinode->lock_sem);
1398        for (i = 0; i < 2; i++) {
1399                cur = buf;
1400                num = 0;
1401                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1402                        if (flock->fl_start > li->offset ||
1403                            (flock->fl_start + length) <
1404                            (li->offset + li->length))
1405                                continue;
1406                        if (current->tgid != li->pid)
1407                                continue;
1408                        if (types[i] != li->type)
1409                                continue;
1410                        if (cinode->can_cache_brlcks) {
1411                                /*
1412                                 * We can cache brlock requests - simply remove
1413                                 * a lock from the file's list.
1414                                 */
1415                                list_del(&li->llist);
1416                                cifs_del_lock_waiters(li);
1417                                kfree(li);
1418                                continue;
1419                        }
1420                        cur->Pid = cpu_to_le16(li->pid);
1421                        cur->LengthLow = cpu_to_le32((u32)li->length);
1422                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1423                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1424                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1425                        /*
1426                         * We need to save a lock here to let us add it again to
1427                         * the file's list if the unlock range request fails on
1428                         * the server.
1429                         */
1430                        list_move(&li->llist, &tmp_llist);
1431                        if (++num == max_num) {
1432                                stored_rc = cifs_lockv(xid, tcon,
1433                                                       cfile->fid.netfid,
1434                                                       li->type, num, 0, buf);
1435                                if (stored_rc) {
1436                                        /*
1437                                         * We failed on the unlock range
1438                                         * request - add all locks from the tmp
1439                                         * list to the head of the file's list.
1440                                         */
1441                                        cifs_move_llist(&tmp_llist,
1442                                                        &cfile->llist->locks);
1443                                        rc = stored_rc;
1444                                } else
1445                                        /*
1446                                         * The unlock range request succeed -
1447                                         * free the tmp list.
1448                                         */
1449                                        cifs_free_llist(&tmp_llist);
1450                                cur = buf;
1451                                num = 0;
1452                        } else
1453                                cur++;
1454                }
1455                if (num) {
1456                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1457                                               types[i], num, 0, buf);
1458                        if (stored_rc) {
1459                                cifs_move_llist(&tmp_llist,
1460                                                &cfile->llist->locks);
1461                                rc = stored_rc;
1462                        } else
1463                                cifs_free_llist(&tmp_llist);
1464                }
1465        }
1466
1467        up_write(&cinode->lock_sem);
1468        kfree(buf);
1469        return rc;
1470}
1471
1472static int
1473cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1474           bool wait_flag, bool posix_lck, int lock, int unlock,
1475           unsigned int xid)
1476{
1477        int rc = 0;
1478        __u64 length = 1 + flock->fl_end - flock->fl_start;
1479        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1480        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1481        struct TCP_Server_Info *server = tcon->ses->server;
1482        struct inode *inode = cfile->dentry->d_inode;
1483
1484        if (posix_lck) {
1485                int posix_lock_type;
1486
1487                rc = cifs_posix_lock_set(file, flock);
1488                if (!rc || rc < 0)
1489                        return rc;
1490
1491                if (type & server->vals->shared_lock_type)
1492                        posix_lock_type = CIFS_RDLCK;
1493                else
1494                        posix_lock_type = CIFS_WRLCK;
1495
1496                if (unlock == 1)
1497                        posix_lock_type = CIFS_UNLCK;
1498
1499                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1500                                      current->tgid, flock->fl_start, length,
1501                                      NULL, posix_lock_type, wait_flag);
1502                goto out;
1503        }
1504
1505        if (lock) {
1506                struct cifsLockInfo *lock;
1507
1508                lock = cifs_lock_init(flock->fl_start, length, type);
1509                if (!lock)
1510                        return -ENOMEM;
1511
1512                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1513                if (rc < 0) {
1514                        kfree(lock);
1515                        return rc;
1516                }
1517                if (!rc)
1518                        goto out;
1519
1520                /*
1521                 * Windows 7 server can delay breaking lease from read to None
1522                 * if we set a byte-range lock on a file - break it explicitly
1523                 * before sending the lock to the server to be sure the next
1524                 * read won't conflict with non-overlapted locks due to
1525                 * pagereading.
1526                 */
1527                if (!CIFS_I(inode)->clientCanCacheAll &&
1528                                        CIFS_I(inode)->clientCanCacheRead) {
1529                        cifs_invalidate_mapping(inode);
1530                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1531                                 inode);
1532                        CIFS_I(inode)->clientCanCacheRead = false;
1533                }
1534
1535                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1536                                            type, 1, 0, wait_flag);
1537                if (rc) {
1538                        kfree(lock);
1539                        return rc;
1540                }
1541
1542                cifs_lock_add(cfile, lock);
1543        } else if (unlock)
1544                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1545
1546out:
1547        if (flock->fl_flags & FL_POSIX)
1548                posix_lock_file_wait(file, flock);
1549        return rc;
1550}
1551
1552int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1553{
1554        int rc, xid;
1555        int lock = 0, unlock = 0;
1556        bool wait_flag = false;
1557        bool posix_lck = false;
1558        struct cifs_sb_info *cifs_sb;
1559        struct cifs_tcon *tcon;
1560        struct cifsInodeInfo *cinode;
1561        struct cifsFileInfo *cfile;
1562        __u16 netfid;
1563        __u32 type;
1564
1565        rc = -EACCES;
1566        xid = get_xid();
1567
1568        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1569                 cmd, flock->fl_flags, flock->fl_type,
1570                 flock->fl_start, flock->fl_end);
1571
1572        cfile = (struct cifsFileInfo *)file->private_data;
1573        tcon = tlink_tcon(cfile->tlink);
1574
1575        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1576                        tcon->ses->server);
1577
1578        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1579        netfid = cfile->fid.netfid;
1580        cinode = CIFS_I(file_inode(file));
1581
1582        if (cap_unix(tcon->ses) &&
1583            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1584            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1585                posix_lck = true;
1586        /*
1587         * BB add code here to normalize offset and length to account for
1588         * negative length which we can not accept over the wire.
1589         */
1590        if (IS_GETLK(cmd)) {
1591                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1592                free_xid(xid);
1593                return rc;
1594        }
1595
1596        if (!lock && !unlock) {
1597                /*
1598                 * if no lock or unlock then nothing to do since we do not
1599                 * know what it is
1600                 */
1601                free_xid(xid);
1602                return -EOPNOTSUPP;
1603        }
1604
1605        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1606                        xid);
1607        free_xid(xid);
1608        return rc;
1609}
1610
1611/*
1612 * update the file size (if needed) after a write. Should be called with
1613 * the inode->i_lock held
1614 */
1615void
1616cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1617                      unsigned int bytes_written)
1618{
1619        loff_t end_of_write = offset + bytes_written;
1620
1621        if (end_of_write > cifsi->server_eof)
1622                cifsi->server_eof = end_of_write;
1623}
1624
1625static ssize_t
1626cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1627           size_t write_size, loff_t *offset)
1628{
1629        int rc = 0;
1630        unsigned int bytes_written = 0;
1631        unsigned int total_written;
1632        struct cifs_sb_info *cifs_sb;
1633        struct cifs_tcon *tcon;
1634        struct TCP_Server_Info *server;
1635        unsigned int xid;
1636        struct dentry *dentry = open_file->dentry;
1637        struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1638        struct cifs_io_parms io_parms;
1639
1640        cifs_sb = CIFS_SB(dentry->d_sb);
1641
1642        cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1643                 write_size, *offset, dentry->d_name.name);
1644
1645        tcon = tlink_tcon(open_file->tlink);
1646        server = tcon->ses->server;
1647
1648        if (!server->ops->sync_write)
1649                return -ENOSYS;
1650
1651        xid = get_xid();
1652
1653        for (total_written = 0; write_size > total_written;
1654             total_written += bytes_written) {
1655                rc = -EAGAIN;
1656                while (rc == -EAGAIN) {
1657                        struct kvec iov[2];
1658                        unsigned int len;
1659
1660                        if (open_file->invalidHandle) {
1661                                /* we could deadlock if we called
1662                                   filemap_fdatawait from here so tell
1663                                   reopen_file not to flush data to
1664                                   server now */
1665                                rc = cifs_reopen_file(open_file, false);
1666                                if (rc != 0)
1667                                        break;
1668                        }
1669
1670                        len = min((size_t)cifs_sb->wsize,
1671                                  write_size - total_written);
1672                        /* iov[0] is reserved for smb header */
1673                        iov[1].iov_base = (char *)write_data + total_written;
1674                        iov[1].iov_len = len;
1675                        io_parms.pid = pid;
1676                        io_parms.tcon = tcon;
1677                        io_parms.offset = *offset;
1678                        io_parms.length = len;
1679                        rc = server->ops->sync_write(xid, open_file, &io_parms,
1680                                                     &bytes_written, iov, 1);
1681                }
1682                if (rc || (bytes_written == 0)) {
1683                        if (total_written)
1684                                break;
1685                        else {
1686                                free_xid(xid);
1687                                return rc;
1688                        }
1689                } else {
1690                        spin_lock(&dentry->d_inode->i_lock);
1691                        cifs_update_eof(cifsi, *offset, bytes_written);
1692                        spin_unlock(&dentry->d_inode->i_lock);
1693                        *offset += bytes_written;
1694                }
1695        }
1696
1697        cifs_stats_bytes_written(tcon, total_written);
1698
1699        if (total_written > 0) {
1700                spin_lock(&dentry->d_inode->i_lock);
1701                if (*offset > dentry->d_inode->i_size)
1702                        i_size_write(dentry->d_inode, *offset);
1703                spin_unlock(&dentry->d_inode->i_lock);
1704        }
1705        mark_inode_dirty_sync(dentry->d_inode);
1706        free_xid(xid);
1707        return total_written;
1708}
1709
1710struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1711                                        bool fsuid_only)
1712{
1713        struct cifsFileInfo *open_file = NULL;
1714        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1715
1716        /* only filter by fsuid on multiuser mounts */
1717        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1718                fsuid_only = false;
1719
1720        spin_lock(&cifs_file_list_lock);
1721        /* we could simply get the first_list_entry since write-only entries
1722           are always at the end of the list but since the first entry might
1723           have a close pending, we go through the whole list */
1724        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1725                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1726                        continue;
1727                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1728                        if (!open_file->invalidHandle) {
1729                                /* found a good file */
1730                                /* lock it so it will not be closed on us */
1731                                cifsFileInfo_get_locked(open_file);
1732                                spin_unlock(&cifs_file_list_lock);
1733                                return open_file;
1734                        } /* else might as well continue, and look for
1735                             another, or simply have the caller reopen it
1736                             again rather than trying to fix this handle */
1737                } else /* write only file */
1738                        break; /* write only files are last so must be done */
1739        }
1740        spin_unlock(&cifs_file_list_lock);
1741        return NULL;
1742}
1743
1744struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1745                                        bool fsuid_only)
1746{
1747        struct cifsFileInfo *open_file, *inv_file = NULL;
1748        struct cifs_sb_info *cifs_sb;
1749        bool any_available = false;
1750        int rc;
1751        unsigned int refind = 0;
1752
1753        /* Having a null inode here (because mapping->host was set to zero by
1754        the VFS or MM) should not happen but we had reports of on oops (due to
1755        it being zero) during stress testcases so we need to check for it */
1756
1757        if (cifs_inode == NULL) {
1758                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1759                dump_stack();
1760                return NULL;
1761        }
1762
1763        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1764
1765        /* only filter by fsuid on multiuser mounts */
1766        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1767                fsuid_only = false;
1768
1769        spin_lock(&cifs_file_list_lock);
1770refind_writable:
1771        if (refind > MAX_REOPEN_ATT) {
1772                spin_unlock(&cifs_file_list_lock);
1773                return NULL;
1774        }
1775        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1776                if (!any_available && open_file->pid != current->tgid)
1777                        continue;
1778                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1779                        continue;
1780                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1781                        if (!open_file->invalidHandle) {
1782                                /* found a good writable file */
1783                                cifsFileInfo_get_locked(open_file);
1784                                spin_unlock(&cifs_file_list_lock);
1785                                return open_file;
1786                        } else {
1787                                if (!inv_file)
1788                                        inv_file = open_file;
1789                        }
1790                }
1791        }
1792        /* couldn't find useable FH with same pid, try any available */
1793        if (!any_available) {
1794                any_available = true;
1795                goto refind_writable;
1796        }
1797
1798        if (inv_file) {
1799                any_available = false;
1800                cifsFileInfo_get_locked(inv_file);
1801        }
1802
1803        spin_unlock(&cifs_file_list_lock);
1804
1805        if (inv_file) {
1806                rc = cifs_reopen_file(inv_file, false);
1807                if (!rc)
1808                        return inv_file;
1809                else {
1810                        spin_lock(&cifs_file_list_lock);
1811                        list_move_tail(&inv_file->flist,
1812                                        &cifs_inode->openFileList);
1813                        spin_unlock(&cifs_file_list_lock);
1814                        cifsFileInfo_put(inv_file);
1815                        spin_lock(&cifs_file_list_lock);
1816                        ++refind;
1817                        goto refind_writable;
1818                }
1819        }
1820
1821        return NULL;
1822}
1823
1824static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1825{
1826        struct address_space *mapping = page->mapping;
1827        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1828        char *write_data;
1829        int rc = -EFAULT;
1830        int bytes_written = 0;
1831        struct inode *inode;
1832        struct cifsFileInfo *open_file;
1833
1834        if (!mapping || !mapping->host)
1835                return -EFAULT;
1836
1837        inode = page->mapping->host;
1838
1839        offset += (loff_t)from;
1840        write_data = kmap(page);
1841        write_data += from;
1842
1843        if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1844                kunmap(page);
1845                return -EIO;
1846        }
1847
1848        /* racing with truncate? */
1849        if (offset > mapping->host->i_size) {
1850                kunmap(page);
1851                return 0; /* don't care */
1852        }
1853
1854        /* check to make sure that we are not extending the file */
1855        if (mapping->host->i_size - offset < (loff_t)to)
1856                to = (unsigned)(mapping->host->i_size - offset);
1857
1858        open_file = find_writable_file(CIFS_I(mapping->host), false);
1859        if (open_file) {
1860                bytes_written = cifs_write(open_file, open_file->pid,
1861                                           write_data, to - from, &offset);
1862                cifsFileInfo_put(open_file);
1863                /* Does mm or vfs already set times? */
1864                inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1865                if ((bytes_written > 0) && (offset))
1866                        rc = 0;
1867                else if (bytes_written < 0)
1868                        rc = bytes_written;
1869        } else {
1870                cifs_dbg(FYI, "No writeable filehandles for inode\n");
1871                rc = -EIO;
1872        }
1873
1874        kunmap(page);
1875        return rc;
1876}
1877
1878static int cifs_writepages(struct address_space *mapping,
1879                           struct writeback_control *wbc)
1880{
1881        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1882        bool done = false, scanned = false, range_whole = false;
1883        pgoff_t end, index;
1884        struct cifs_writedata *wdata;
1885        struct TCP_Server_Info *server;
1886        struct page *page;
1887        int rc = 0;
1888
1889        /*
1890         * If wsize is smaller than the page cache size, default to writing
1891         * one page at a time via cifs_writepage
1892         */
1893        if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1894                return generic_writepages(mapping, wbc);
1895
1896        if (wbc->range_cyclic) {
1897                index = mapping->writeback_index; /* Start from prev offset */
1898                end = -1;
1899        } else {
1900                index = wbc->range_start >> PAGE_CACHE_SHIFT;
1901                end = wbc->range_end >> PAGE_CACHE_SHIFT;
1902                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1903                        range_whole = true;
1904                scanned = true;
1905        }
1906retry:
1907        while (!done && index <= end) {
1908                unsigned int i, nr_pages, found_pages;
1909                pgoff_t next = 0, tofind;
1910                struct page **pages;
1911
1912                tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1913                                end - index) + 1;
1914
1915                wdata = cifs_writedata_alloc((unsigned int)tofind,
1916                                             cifs_writev_complete);
1917                if (!wdata) {
1918                        rc = -ENOMEM;
1919                        break;
1920                }
1921
1922                /*
1923                 * find_get_pages_tag seems to return a max of 256 on each
1924                 * iteration, so we must call it several times in order to
1925                 * fill the array or the wsize is effectively limited to
1926                 * 256 * PAGE_CACHE_SIZE.
1927                 */
1928                found_pages = 0;
1929                pages = wdata->pages;
1930                do {
1931                        nr_pages = find_get_pages_tag(mapping, &index,
1932                                                        PAGECACHE_TAG_DIRTY,
1933                                                        tofind, pages);
1934                        found_pages += nr_pages;
1935                        tofind -= nr_pages;
1936                        pages += nr_pages;
1937                } while (nr_pages && tofind && index <= end);
1938
1939                if (found_pages == 0) {
1940                        kref_put(&wdata->refcount, cifs_writedata_release);
1941                        break;
1942                }
1943
1944                nr_pages = 0;
1945                for (i = 0; i < found_pages; i++) {
1946                        page = wdata->pages[i];
1947                        /*
1948                         * At this point we hold neither mapping->tree_lock nor
1949                         * lock on the page itself: the page may be truncated or
1950                         * invalidated (changing page->mapping to NULL), or even
1951                         * swizzled back from swapper_space to tmpfs file
1952                         * mapping
1953                         */
1954
1955                        if (nr_pages == 0)
1956                                lock_page(page);
1957                        else if (!trylock_page(page))
1958                                break;
1959
1960                        if (unlikely(page->mapping != mapping)) {
1961                                unlock_page(page);
1962                                break;
1963                        }
1964
1965                        if (!wbc->range_cyclic && page->index > end) {
1966                                done = true;
1967                                unlock_page(page);
1968                                break;
1969                        }
1970
1971                        if (next && (page->index != next)) {
1972                                /* Not next consecutive page */
1973                                unlock_page(page);
1974                                break;
1975                        }
1976
1977                        if (wbc->sync_mode != WB_SYNC_NONE)
1978                                wait_on_page_writeback(page);
1979
1980                        if (PageWriteback(page) ||
1981                                        !clear_page_dirty_for_io(page)) {
1982                                unlock_page(page);
1983                                break;
1984                        }
1985
1986                        /*
1987                         * This actually clears the dirty bit in the radix tree.
1988                         * See cifs_writepage() for more commentary.
1989                         */
1990                        set_page_writeback(page);
1991
1992                        if (page_offset(page) >= i_size_read(mapping->host)) {
1993                                done = true;
1994                                unlock_page(page);
1995                                end_page_writeback(page);
1996                                break;
1997                        }
1998
1999                        wdata->pages[i] = page;
2000                        next = page->index + 1;
2001                        ++nr_pages;
2002                }
2003
2004                /* reset index to refind any pages skipped */
2005                if (nr_pages == 0)
2006                        index = wdata->pages[0]->index + 1;
2007
2008                /* put any pages we aren't going to use */
2009                for (i = nr_pages; i < found_pages; i++) {
2010                        page_cache_release(wdata->pages[i]);
2011                        wdata->pages[i] = NULL;
2012                }
2013
2014                /* nothing to write? */
2015                if (nr_pages == 0) {
2016                        kref_put(&wdata->refcount, cifs_writedata_release);
2017                        continue;
2018                }
2019
2020                wdata->sync_mode = wbc->sync_mode;
2021                wdata->nr_pages = nr_pages;
2022                wdata->offset = page_offset(wdata->pages[0]);
2023                wdata->pagesz = PAGE_CACHE_SIZE;
2024                wdata->tailsz =
2025                        min(i_size_read(mapping->host) -
2026                            page_offset(wdata->pages[nr_pages - 1]),
2027                            (loff_t)PAGE_CACHE_SIZE);
2028                wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2029                                        wdata->tailsz;
2030
2031                do {
2032                        if (wdata->cfile != NULL)
2033                                cifsFileInfo_put(wdata->cfile);
2034                        wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2035                                                          false);
2036                        if (!wdata->cfile) {
2037                                cifs_dbg(VFS, "No writable handles for inode\n");
2038                                rc = -EBADF;
2039                                break;
2040                        }
2041                        wdata->pid = wdata->cfile->pid;
2042                        server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2043                        rc = server->ops->async_writev(wdata);
2044                } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2045
2046                for (i = 0; i < nr_pages; ++i)
2047                        unlock_page(wdata->pages[i]);
2048
2049                /* send failure -- clean up the mess */
2050                if (rc != 0) {
2051                        for (i = 0; i < nr_pages; ++i) {
2052                                if (rc == -EAGAIN)
2053                                        redirty_page_for_writepage(wbc,
2054                                                           wdata->pages[i]);
2055                                else
2056                                        SetPageError(wdata->pages[i]);
2057                                end_page_writeback(wdata->pages[i]);
2058                                page_cache_release(wdata->pages[i]);
2059                        }
2060                        if (rc != -EAGAIN)
2061                                mapping_set_error(mapping, rc);
2062                }
2063                kref_put(&wdata->refcount, cifs_writedata_release);
2064
2065                wbc->nr_to_write -= nr_pages;
2066                if (wbc->nr_to_write <= 0)
2067                        done = true;
2068
2069                index = next;
2070        }
2071
2072        if (!scanned && !done) {
2073                /*
2074                 * We hit the last page and there is more work to be done: wrap
2075                 * back to the start of the file
2076                 */
2077                scanned = true;
2078                index = 0;
2079                goto retry;
2080        }
2081
2082        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2083                mapping->writeback_index = index;
2084
2085        return rc;
2086}
2087
2088static int
2089cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2090{
2091        int rc;
2092        unsigned int xid;
2093
2094        xid = get_xid();
2095/* BB add check for wbc flags */
2096        page_cache_get(page);
2097        if (!PageUptodate(page))
2098                cifs_dbg(FYI, "ppw - page not up to date\n");
2099
2100        /*
2101         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2102         *
2103         * A writepage() implementation always needs to do either this,
2104         * or re-dirty the page with "redirty_page_for_writepage()" in
2105         * the case of a failure.
2106         *
2107         * Just unlocking the page will cause the radix tree tag-bits
2108         * to fail to update with the state of the page correctly.
2109         */
2110        set_page_writeback(page);
2111retry_write:
2112        rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2113        if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2114                goto retry_write;
2115        else if (rc == -EAGAIN)
2116                redirty_page_for_writepage(wbc, page);
2117        else if (rc != 0)
2118                SetPageError(page);
2119        else
2120                SetPageUptodate(page);
2121        end_page_writeback(page);
2122        page_cache_release(page);
2123        free_xid(xid);
2124        return rc;
2125}
2126
2127static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2128{
2129        int rc = cifs_writepage_locked(page, wbc);
2130        unlock_page(page);
2131        return rc;
2132}
2133
2134static int cifs_write_end(struct file *file, struct address_space *mapping,
2135                        loff_t pos, unsigned len, unsigned copied,
2136                        struct page *page, void *fsdata)
2137{
2138        int rc;
2139        struct inode *inode = mapping->host;
2140        struct cifsFileInfo *cfile = file->private_data;
2141        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2142        __u32 pid;
2143
2144        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2145                pid = cfile->pid;
2146        else
2147                pid = current->tgid;
2148
2149        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2150                 page, pos, copied);
2151
2152        if (PageChecked(page)) {
2153                if (copied == len)
2154                        SetPageUptodate(page);
2155                ClearPageChecked(page);
2156        } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2157                SetPageUptodate(page);
2158
2159        if (!PageUptodate(page)) {
2160                char *page_data;
2161                unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2162                unsigned int xid;
2163
2164                xid = get_xid();
2165                /* this is probably better than directly calling
2166                   partialpage_write since in this function the file handle is
2167                   known which we might as well leverage */
2168                /* BB check if anything else missing out of ppw
2169                   such as updating last write time */
2170                page_data = kmap(page);
2171                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2172                /* if (rc < 0) should we set writebehind rc? */
2173                kunmap(page);
2174
2175                free_xid(xid);
2176        } else {
2177                rc = copied;
2178                pos += copied;
2179                set_page_dirty(page);
2180        }
2181
2182        if (rc > 0) {
2183                spin_lock(&inode->i_lock);
2184                if (pos > inode->i_size)
2185                        i_size_write(inode, pos);
2186                spin_unlock(&inode->i_lock);
2187        }
2188
2189        unlock_page(page);
2190        page_cache_release(page);
2191
2192        return rc;
2193}
2194
2195int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2196                      int datasync)
2197{
2198        unsigned int xid;
2199        int rc = 0;
2200        struct cifs_tcon *tcon;
2201        struct TCP_Server_Info *server;
2202        struct cifsFileInfo *smbfile = file->private_data;
2203        struct inode *inode = file_inode(file);
2204        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2205
2206        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2207        if (rc)
2208                return rc;
2209        mutex_lock(&inode->i_mutex);
2210
2211        xid = get_xid();
2212
2213        cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2214                 file->f_path.dentry->d_name.name, datasync);
2215
2216        if (!CIFS_I(inode)->clientCanCacheRead) {
2217                rc = cifs_invalidate_mapping(inode);
2218                if (rc) {
2219                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2220                        rc = 0; /* don't care about it in fsync */
2221                }
2222        }
2223
2224        tcon = tlink_tcon(smbfile->tlink);
2225        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2226                server = tcon->ses->server;
2227                if (server->ops->flush)
2228                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2229                else
2230                        rc = -ENOSYS;
2231        }
2232
2233        free_xid(xid);
2234        mutex_unlock(&inode->i_mutex);
2235        return rc;
2236}
2237
2238int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2239{
2240        unsigned int xid;
2241        int rc = 0;
2242        struct cifs_tcon *tcon;
2243        struct TCP_Server_Info *server;
2244        struct cifsFileInfo *smbfile = file->private_data;
2245        struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2246        struct inode *inode = file->f_mapping->host;
2247
2248        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2249        if (rc)
2250                return rc;
2251        mutex_lock(&inode->i_mutex);
2252
2253        xid = get_xid();
2254
2255        cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2256                 file->f_path.dentry->d_name.name, datasync);
2257
2258        tcon = tlink_tcon(smbfile->tlink);
2259        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2260                server = tcon->ses->server;
2261                if (server->ops->flush)
2262                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2263                else
2264                        rc = -ENOSYS;
2265        }
2266
2267        free_xid(xid);
2268        mutex_unlock(&inode->i_mutex);
2269        return rc;
2270}
2271
2272/*
2273 * As file closes, flush all cached write data for this inode checking
2274 * for write behind errors.
2275 */
2276int cifs_flush(struct file *file, fl_owner_t id)
2277{
2278        struct inode *inode = file_inode(file);
2279        int rc = 0;
2280
2281        if (file->f_mode & FMODE_WRITE)
2282                rc = filemap_write_and_wait(inode->i_mapping);
2283
2284        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2285
2286        return rc;
2287}
2288
2289static int
2290cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2291{
2292        int rc = 0;
2293        unsigned long i;
2294
2295        for (i = 0; i < num_pages; i++) {
2296                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2297                if (!pages[i]) {
2298                        /*
2299                         * save number of pages we have already allocated and
2300                         * return with ENOMEM error
2301                         */
2302                        num_pages = i;
2303                        rc = -ENOMEM;
2304                        break;
2305                }
2306        }
2307
2308        if (rc) {
2309                for (i = 0; i < num_pages; i++)
2310                        put_page(pages[i]);
2311        }
2312        return rc;
2313}
2314
2315static inline
2316size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2317{
2318        size_t num_pages;
2319        size_t clen;
2320
2321        clen = min_t(const size_t, len, wsize);
2322        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2323
2324        if (cur_len)
2325                *cur_len = clen;
2326
2327        return num_pages;
2328}
2329
2330static void
2331cifs_uncached_writev_complete(struct work_struct *work)
2332{
2333        int i;
2334        struct cifs_writedata *wdata = container_of(work,
2335                                        struct cifs_writedata, work);
2336        struct inode *inode = wdata->cfile->dentry->d_inode;
2337        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2338
2339        spin_lock(&inode->i_lock);
2340        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2341        if (cifsi->server_eof > inode->i_size)
2342                i_size_write(inode, cifsi->server_eof);
2343        spin_unlock(&inode->i_lock);
2344
2345        complete(&wdata->done);
2346
2347        if (wdata->result != -EAGAIN) {
2348                for (i = 0; i < wdata->nr_pages; i++)
2349                        put_page(wdata->pages[i]);
2350        }
2351
2352        kref_put(&wdata->refcount, cifs_writedata_release);
2353}
2354
2355/* attempt to send write to server, retry on any -EAGAIN errors */
2356static int
2357cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2358{
2359        int rc;
2360        struct TCP_Server_Info *server;
2361
2362        server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2363
2364        do {
2365                if (wdata->cfile->invalidHandle) {
2366                        rc = cifs_reopen_file(wdata->cfile, false);
2367                        if (rc != 0)
2368                                continue;
2369                }
2370                rc = server->ops->async_writev(wdata);
2371        } while (rc == -EAGAIN);
2372
2373        return rc;
2374}
2375
2376static ssize_t
2377cifs_iovec_write(struct file *file, const struct iovec *iov,
2378                 unsigned long nr_segs, loff_t *poffset)
2379{
2380        unsigned long nr_pages, i;
2381        size_t copied, len, cur_len;
2382        ssize_t total_written = 0;
2383        loff_t offset;
2384        struct iov_iter it;
2385        struct cifsFileInfo *open_file;
2386        struct cifs_tcon *tcon;
2387        struct cifs_sb_info *cifs_sb;
2388        struct cifs_writedata *wdata, *tmp;
2389        struct list_head wdata_list;
2390        int rc;
2391        pid_t pid;
2392
2393        len = iov_length(iov, nr_segs);
2394        if (!len)
2395                return 0;
2396
2397        rc = generic_write_checks(file, poffset, &len, 0);
2398        if (rc)
2399                return rc;
2400
2401        INIT_LIST_HEAD(&wdata_list);
2402        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2403        open_file = file->private_data;
2404        tcon = tlink_tcon(open_file->tlink);
2405
2406        if (!tcon->ses->server->ops->async_writev)
2407                return -ENOSYS;
2408
2409        offset = *poffset;
2410
2411        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2412                pid = open_file->pid;
2413        else
2414                pid = current->tgid;
2415
2416        iov_iter_init(&it, iov, nr_segs, len, 0);
2417        do {
2418                size_t save_len;
2419
2420                nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2421                wdata = cifs_writedata_alloc(nr_pages,
2422                                             cifs_uncached_writev_complete);
2423                if (!wdata) {
2424                        rc = -ENOMEM;
2425                        break;
2426                }
2427
2428                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2429                if (rc) {
2430                        kfree(wdata);
2431                        break;
2432                }
2433
2434                save_len = cur_len;
2435                for (i = 0; i < nr_pages; i++) {
2436                        copied = min_t(const size_t, cur_len, PAGE_SIZE);
2437                        copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2438                                                         0, copied);
2439                        cur_len -= copied;
2440                        iov_iter_advance(&it, copied);
2441                }
2442                cur_len = save_len - cur_len;
2443
2444                wdata->sync_mode = WB_SYNC_ALL;
2445                wdata->nr_pages = nr_pages;
2446                wdata->offset = (__u64)offset;
2447                wdata->cfile = cifsFileInfo_get(open_file);
2448                wdata->pid = pid;
2449                wdata->bytes = cur_len;
2450                wdata->pagesz = PAGE_SIZE;
2451                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2452                rc = cifs_uncached_retry_writev(wdata);
2453                if (rc) {
2454                        kref_put(&wdata->refcount, cifs_writedata_release);
2455                        break;
2456                }
2457
2458                list_add_tail(&wdata->list, &wdata_list);
2459                offset += cur_len;
2460                len -= cur_len;
2461        } while (len > 0);
2462
2463        /*
2464         * If at least one write was successfully sent, then discard any rc
2465         * value from the later writes. If the other write succeeds, then
2466         * we'll end up returning whatever was written. If it fails, then
2467         * we'll get a new rc value from that.
2468         */
2469        if (!list_empty(&wdata_list))
2470                rc = 0;
2471
2472        /*
2473         * Wait for and collect replies for any successful sends in order of
2474         * increasing offset. Once an error is hit or we get a fatal signal
2475         * while waiting, then return without waiting for any more replies.
2476         */
2477restart_loop:
2478        list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2479                if (!rc) {
2480                        /* FIXME: freezable too? */
2481                        rc = wait_for_completion_killable(&wdata->done);
2482                        if (rc)
2483                                rc = -EINTR;
2484                        else if (wdata->result)
2485                                rc = wdata->result;
2486                        else
2487                                total_written += wdata->bytes;
2488
2489                        /* resend call if it's a retryable error */
2490                        if (rc == -EAGAIN) {
2491                                rc = cifs_uncached_retry_writev(wdata);
2492                                goto restart_loop;
2493                        }
2494                }
2495                list_del_init(&wdata->list);
2496                kref_put(&wdata->refcount, cifs_writedata_release);
2497        }
2498
2499        if (total_written > 0)
2500                *poffset += total_written;
2501
2502        cifs_stats_bytes_written(tcon, total_written);
2503        return total_written ? total_written : (ssize_t)rc;
2504}
2505
2506ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2507                                unsigned long nr_segs, loff_t pos)
2508{
2509        ssize_t written;
2510        struct inode *inode;
2511
2512        inode = file_inode(iocb->ki_filp);
2513
2514        /*
2515         * BB - optimize the way when signing is disabled. We can drop this
2516         * extra memory-to-memory copying and use iovec buffers for constructing
2517         * write request.
2518         */
2519
2520        written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2521        if (written > 0) {
2522                CIFS_I(inode)->invalid_mapping = true;
2523                iocb->ki_pos = pos;
2524        }
2525
2526        return written;
2527}
2528
2529static ssize_t
2530cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2531            unsigned long nr_segs, loff_t pos)
2532{
2533        struct file *file = iocb->ki_filp;
2534        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2535        struct inode *inode = file->f_mapping->host;
2536        struct cifsInodeInfo *cinode = CIFS_I(inode);
2537        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2538        ssize_t rc = -EACCES;
2539
2540        BUG_ON(iocb->ki_pos != pos);
2541
2542        /*
2543         * We need to hold the sem to be sure nobody modifies lock list
2544         * with a brlock that prevents writing.
2545         */
2546        down_read(&cinode->lock_sem);
2547        if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2548                                     server->vals->exclusive_lock_type, NULL,
2549                                     CIFS_WRITE_OP)) {
2550                mutex_lock(&inode->i_mutex);
2551                rc = __generic_file_aio_write(iocb, iov, nr_segs,
2552                                               &iocb->ki_pos);
2553                mutex_unlock(&inode->i_mutex);
2554        }
2555
2556        if (rc > 0 || rc == -EIOCBQUEUED) {
2557                ssize_t err;
2558
2559                err = generic_write_sync(file, pos, rc);
2560                if (err < 0 && rc > 0)
2561                        rc = err;
2562        }
2563
2564        up_read(&cinode->lock_sem);
2565        return rc;
2566}
2567
2568ssize_t
2569cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2570                   unsigned long nr_segs, loff_t pos)
2571{
2572        struct inode *inode = file_inode(iocb->ki_filp);
2573        struct cifsInodeInfo *cinode = CIFS_I(inode);
2574        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2575        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2576                                                iocb->ki_filp->private_data;
2577        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2578        ssize_t written;
2579
2580        if (cinode->clientCanCacheAll) {
2581                if (cap_unix(tcon->ses) &&
2582                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2583                    && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2584                        return generic_file_aio_write(iocb, iov, nr_segs, pos);
2585                return cifs_writev(iocb, iov, nr_segs, pos);
2586        }
2587        /*
2588         * For non-oplocked files in strict cache mode we need to write the data
2589         * to the server exactly from the pos to pos+len-1 rather than flush all
2590         * affected pages because it may cause a error with mandatory locks on
2591         * these pages but not on the region from pos to ppos+len-1.
2592         */
2593        written = cifs_user_writev(iocb, iov, nr_segs, pos);
2594        if (written > 0 && cinode->clientCanCacheRead) {
2595                /*
2596                 * Windows 7 server can delay breaking level2 oplock if a write
2597                 * request comes - break it on the client to prevent reading
2598                 * an old data.
2599                 */
2600                cifs_invalidate_mapping(inode);
2601                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2602                         inode);
2603                cinode->clientCanCacheRead = false;
2604        }
2605        return written;
2606}
2607
2608static struct cifs_readdata *
2609cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2610{
2611        struct cifs_readdata *rdata;
2612
2613        rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2614                        GFP_KERNEL);
2615        if (rdata != NULL) {
2616                kref_init(&rdata->refcount);
2617                INIT_LIST_HEAD(&rdata->list);
2618                init_completion(&rdata->done);
2619                INIT_WORK(&rdata->work, complete);
2620        }
2621
2622        return rdata;
2623}
2624
2625void
2626cifs_readdata_release(struct kref *refcount)
2627{
2628        struct cifs_readdata *rdata = container_of(refcount,
2629                                        struct cifs_readdata, refcount);
2630
2631        if (rdata->cfile)
2632                cifsFileInfo_put(rdata->cfile);
2633
2634        kfree(rdata);
2635}
2636
2637static int
2638cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2639{
2640        int rc = 0;
2641        struct page *page;
2642        unsigned int i;
2643
2644        for (i = 0; i < nr_pages; i++) {
2645                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2646                if (!page) {
2647                        rc = -ENOMEM;
2648                        break;
2649                }
2650                rdata->pages[i] = page;
2651        }
2652
2653        if (rc) {
2654                for (i = 0; i < nr_pages; i++) {
2655                        put_page(rdata->pages[i]);
2656                        rdata->pages[i] = NULL;
2657                }
2658        }
2659        return rc;
2660}
2661
2662static void
2663cifs_uncached_readdata_release(struct kref *refcount)
2664{
2665        struct cifs_readdata *rdata = container_of(refcount,
2666                                        struct cifs_readdata, refcount);
2667        unsigned int i;
2668
2669        for (i = 0; i < rdata->nr_pages; i++) {
2670                put_page(rdata->pages[i]);
2671                rdata->pages[i] = NULL;
2672        }
2673        cifs_readdata_release(refcount);
2674}
2675
2676static int
2677cifs_retry_async_readv(struct cifs_readdata *rdata)
2678{
2679        int rc;
2680        struct TCP_Server_Info *server;
2681
2682        server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2683
2684        do {
2685                if (rdata->cfile->invalidHandle) {
2686                        rc = cifs_reopen_file(rdata->cfile, true);
2687                        if (rc != 0)
2688                                continue;
2689                }
2690                rc = server->ops->async_readv(rdata);
2691        } while (rc == -EAGAIN);
2692
2693        return rc;
2694}
2695
2696/**
2697 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2698 * @rdata:      the readdata response with list of pages holding data
2699 * @iov:        vector in which we should copy the data
2700 * @nr_segs:    number of segments in vector
2701 * @offset:     offset into file of the first iovec
2702 * @copied:     used to return the amount of data copied to the iov
2703 *
2704 * This function copies data from a list of pages in a readdata response into
2705 * an array of iovecs. It will first calculate where the data should go
2706 * based on the info in the readdata and then copy the data into that spot.
2707 */
2708static ssize_t
2709cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2710                        unsigned long nr_segs, loff_t offset, ssize_t *copied)
2711{
2712        int rc = 0;
2713        struct iov_iter ii;
2714        size_t pos = rdata->offset - offset;
2715        ssize_t remaining = rdata->bytes;
2716        unsigned char *pdata;
2717        unsigned int i;
2718
2719        /* set up iov_iter and advance to the correct offset */
2720        iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2721        iov_iter_advance(&ii, pos);
2722
2723        *copied = 0;
2724        for (i = 0; i < rdata->nr_pages; i++) {
2725                ssize_t copy;
2726                struct page *page = rdata->pages[i];
2727
2728                /* copy a whole page or whatever's left */
2729                copy = min_t(ssize_t, remaining, PAGE_SIZE);
2730
2731                /* ...but limit it to whatever space is left in the iov */
2732                copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2733
2734                /* go while there's data to be copied and no errors */
2735                if (copy && !rc) {
2736                        pdata = kmap(page);
2737                        rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2738                                                (int)copy);
2739                        kunmap(page);
2740                        if (!rc) {
2741                                *copied += copy;
2742                                remaining -= copy;
2743                                iov_iter_advance(&ii, copy);
2744                        }
2745                }
2746        }
2747
2748        return rc;
2749}
2750
2751static void
2752cifs_uncached_readv_complete(struct work_struct *work)
2753{
2754        struct cifs_readdata *rdata = container_of(work,
2755                                                struct cifs_readdata, work);
2756
2757        complete(&rdata->done);
2758        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2759}
2760
2761static int
2762cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2763                        struct cifs_readdata *rdata, unsigned int len)
2764{
2765        int total_read = 0, result = 0;
2766        unsigned int i;
2767        unsigned int nr_pages = rdata->nr_pages;
2768        struct kvec iov;
2769
2770        rdata->tailsz = PAGE_SIZE;
2771        for (i = 0; i < nr_pages; i++) {
2772                struct page *page = rdata->pages[i];
2773
2774                if (len >= PAGE_SIZE) {
2775                        /* enough data to fill the page */
2776                        iov.iov_base = kmap(page);
2777                        iov.iov_len = PAGE_SIZE;
2778                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2779                                 i, iov.iov_base, iov.iov_len);
2780                        len -= PAGE_SIZE;
2781                } else if (len > 0) {
2782                        /* enough for partial page, fill and zero the rest */
2783                        iov.iov_base = kmap(page);
2784                        iov.iov_len = len;
2785                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2786                                 i, iov.iov_base, iov.iov_len);
2787                        memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2788                        rdata->tailsz = len;
2789                        len = 0;
2790                } else {
2791                        /* no need to hold page hostage */
2792                        rdata->pages[i] = NULL;
2793                        rdata->nr_pages--;
2794                        put_page(page);
2795                        continue;
2796                }
2797
2798                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2799                kunmap(page);
2800                if (result < 0)
2801                        break;
2802
2803                total_read += result;
2804        }
2805
2806        return total_read > 0 ? total_read : result;
2807}
2808
2809static ssize_t
2810cifs_iovec_read(struct file *file, const struct iovec *iov,
2811                 unsigned long nr_segs, loff_t *poffset)
2812{
2813        ssize_t rc;
2814        size_t len, cur_len;
2815        ssize_t total_read = 0;
2816        loff_t offset = *poffset;
2817        unsigned int npages;
2818        struct cifs_sb_info *cifs_sb;
2819        struct cifs_tcon *tcon;
2820        struct cifsFileInfo *open_file;
2821        struct cifs_readdata *rdata, *tmp;
2822        struct list_head rdata_list;
2823        pid_t pid;
2824
2825        if (!nr_segs)
2826                return 0;
2827
2828        len = iov_length(iov, nr_segs);
2829        if (!len)
2830                return 0;
2831
2832        INIT_LIST_HEAD(&rdata_list);
2833        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2834        open_file = file->private_data;
2835        tcon = tlink_tcon(open_file->tlink);
2836
2837        if (!tcon->ses->server->ops->async_readv)
2838                return -ENOSYS;
2839
2840        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2841                pid = open_file->pid;
2842        else
2843                pid = current->tgid;
2844
2845        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2846                cifs_dbg(FYI, "attempting read on write only file instance\n");
2847
2848        do {
2849                cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2850                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2851
2852                /* allocate a readdata struct */
2853                rdata = cifs_readdata_alloc(npages,
2854                                            cifs_uncached_readv_complete);
2855                if (!rdata) {
2856                        rc = -ENOMEM;
2857                        goto error;
2858                }
2859
2860                rc = cifs_read_allocate_pages(rdata, npages);
2861                if (rc)
2862                        goto error;
2863
2864                rdata->cfile = cifsFileInfo_get(open_file);
2865                rdata->nr_pages = npages;
2866                rdata->offset = offset;
2867                rdata->bytes = cur_len;
2868                rdata->pid = pid;
2869                rdata->pagesz = PAGE_SIZE;
2870                rdata->read_into_pages = cifs_uncached_read_into_pages;
2871
2872                rc = cifs_retry_async_readv(rdata);
2873error:
2874                if (rc) {
2875                        kref_put(&rdata->refcount,
2876                                 cifs_uncached_readdata_release);
2877                        break;
2878                }
2879
2880                list_add_tail(&rdata->list, &rdata_list);
2881                offset += cur_len;
2882                len -= cur_len;
2883        } while (len > 0);
2884
2885        /* if at least one read request send succeeded, then reset rc */
2886        if (!list_empty(&rdata_list))
2887                rc = 0;
2888
2889        /* the loop below should proceed in the order of increasing offsets */
2890restart_loop:
2891        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2892                if (!rc) {
2893                        ssize_t copied;
2894
2895                        /* FIXME: freezable sleep too? */
2896                        rc = wait_for_completion_killable(&rdata->done);
2897                        if (rc)
2898                                rc = -EINTR;
2899                        else if (rdata->result)
2900                                rc = rdata->result;
2901                        else {
2902                                rc = cifs_readdata_to_iov(rdata, iov,
2903                                                        nr_segs, *poffset,
2904                                                        &copied);
2905                                total_read += copied;
2906                        }
2907
2908                        /* resend call if it's a retryable error */
2909                        if (rc == -EAGAIN) {
2910                                rc = cifs_retry_async_readv(rdata);
2911                                goto restart_loop;
2912                        }
2913                }
2914                list_del_init(&rdata->list);
2915                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2916        }
2917
2918        cifs_stats_bytes_read(tcon, total_read);
2919        *poffset += total_read;
2920
2921        /* mask nodata case */
2922        if (rc == -ENODATA)
2923                rc = 0;
2924
2925        return total_read ? total_read : rc;
2926}
2927
2928ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2929                               unsigned long nr_segs, loff_t pos)
2930{
2931        ssize_t read;
2932
2933        read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2934        if (read > 0)
2935                iocb->ki_pos = pos;
2936
2937        return read;
2938}
2939
2940ssize_t
2941cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2942                  unsigned long nr_segs, loff_t pos)
2943{
2944        struct inode *inode = file_inode(iocb->ki_filp);
2945        struct cifsInodeInfo *cinode = CIFS_I(inode);
2946        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2947        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2948                                                iocb->ki_filp->private_data;
2949        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2950        int rc = -EACCES;
2951
2952        /*
2953         * In strict cache mode we need to read from the server all the time
2954         * if we don't have level II oplock because the server can delay mtime
2955         * change - so we can't make a decision about inode invalidating.
2956         * And we can also fail with pagereading if there are mandatory locks
2957         * on pages affected by this read but not on the region from pos to
2958         * pos+len-1.
2959         */
2960        if (!cinode->clientCanCacheRead)
2961                return cifs_user_readv(iocb, iov, nr_segs, pos);
2962
2963        if (cap_unix(tcon->ses) &&
2964            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2965            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2966                return generic_file_aio_read(iocb, iov, nr_segs, pos);
2967
2968        /*
2969         * We need to hold the sem to be sure nobody modifies lock list
2970         * with a brlock that prevents reading.
2971         */
2972        down_read(&cinode->lock_sem);
2973        if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2974                                     tcon->ses->server->vals->shared_lock_type,
2975                                     NULL, CIFS_READ_OP))
2976                rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2977        up_read(&cinode->lock_sem);
2978        return rc;
2979}
2980
2981static ssize_t
2982cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2983{
2984        int rc = -EACCES;
2985        unsigned int bytes_read = 0;
2986        unsigned int total_read;
2987        unsigned int current_read_size;
2988        unsigned int rsize;
2989        struct cifs_sb_info *cifs_sb;
2990        struct cifs_tcon *tcon;
2991        struct TCP_Server_Info *server;
2992        unsigned int xid;
2993        char *cur_offset;
2994        struct cifsFileInfo *open_file;
2995        struct cifs_io_parms io_parms;
2996        int buf_type = CIFS_NO_BUFFER;
2997        __u32 pid;
2998
2999        xid = get_xid();
3000        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3001
3002        /* FIXME: set up handlers for larger reads and/or convert to async */
3003        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3004
3005        if (file->private_data == NULL) {
3006                rc = -EBADF;
3007                free_xid(xid);
3008                return rc;
3009        }
3010        open_file = file->private_data;
3011        tcon = tlink_tcon(open_file->tlink);
3012        server = tcon->ses->server;
3013
3014        if (!server->ops->sync_read) {
3015                free_xid(xid);
3016                return -ENOSYS;
3017        }
3018
3019        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3020                pid = open_file->pid;
3021        else
3022                pid = current->tgid;
3023
3024        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3025                cifs_dbg(FYI, "attempting read on write only file instance\n");
3026
3027        for (total_read = 0, cur_offset = read_data; read_size > total_read;
3028             total_read += bytes_read, cur_offset += bytes_read) {
3029                current_read_size = min_t(uint, read_size - total_read, rsize);
3030                /*
3031                 * For windows me and 9x we do not want to request more than it
3032                 * negotiated since it will refuse the read then.
3033                 */
3034                if ((tcon->ses) && !(tcon->ses->capabilities &
3035                                tcon->ses->server->vals->cap_large_files)) {
3036                        current_read_size = min_t(uint, current_read_size,
3037                                        CIFSMaxBufSize);
3038                }
3039                rc = -EAGAIN;
3040                while (rc == -EAGAIN) {
3041                        if (open_file->invalidHandle) {
3042                                rc = cifs_reopen_file(open_file, true);
3043                                if (rc != 0)
3044                                        break;
3045                        }
3046                        io_parms.pid = pid;
3047                        io_parms.tcon = tcon;
3048                        io_parms.offset = *offset;
3049                        io_parms.length = current_read_size;
3050                        rc = server->ops->sync_read(xid, open_file, &io_parms,
3051                                                    &bytes_read, &cur_offset,
3052                                                    &buf_type);
3053                }
3054                if (rc || (bytes_read == 0)) {
3055                        if (total_read) {
3056                                break;
3057                        } else {
3058                                free_xid(xid);
3059                                return rc;
3060                        }
3061                } else {
3062                        cifs_stats_bytes_read(tcon, total_read);
3063                        *offset += bytes_read;
3064                }
3065        }
3066        free_xid(xid);
3067        return total_read;
3068}
3069
3070/*
3071 * If the page is mmap'ed into a process' page tables, then we need to make
3072 * sure that it doesn't change while being written back.
3073 */
3074static int
3075cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3076{
3077        struct page *page = vmf->page;
3078
3079        lock_page(page);
3080        return VM_FAULT_LOCKED;
3081}
3082
3083static struct vm_operations_struct cifs_file_vm_ops = {
3084        .fault = filemap_fault,
3085        .page_mkwrite = cifs_page_mkwrite,
3086        .remap_pages = generic_file_remap_pages,
3087};
3088
3089int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3090{
3091        int rc, xid;
3092        struct inode *inode = file_inode(file);
3093
3094        xid = get_xid();
3095
3096        if (!CIFS_I(inode)->clientCanCacheRead) {
3097                rc = cifs_invalidate_mapping(inode);
3098                if (rc)
3099                        return rc;
3100        }
3101
3102        rc = generic_file_mmap(file, vma);
3103        if (rc == 0)
3104                vma->vm_ops = &cifs_file_vm_ops;
3105        free_xid(xid);
3106        return rc;
3107}
3108
3109int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3110{
3111        int rc, xid;
3112
3113        xid = get_xid();
3114        rc = cifs_revalidate_file(file);
3115        if (rc) {
3116                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3117                         rc);
3118                free_xid(xid);
3119                return rc;
3120        }
3121        rc = generic_file_mmap(file, vma);
3122        if (rc == 0)
3123                vma->vm_ops = &cifs_file_vm_ops;
3124        free_xid(xid);
3125        return rc;
3126}
3127
3128static void
3129cifs_readv_complete(struct work_struct *work)
3130{
3131        unsigned int i;
3132        struct cifs_readdata *rdata = container_of(work,
3133                                                struct cifs_readdata, work);
3134
3135        for (i = 0; i < rdata->nr_pages; i++) {
3136                struct page *page = rdata->pages[i];
3137
3138                lru_cache_add_file(page);
3139
3140                if (rdata->result == 0) {
3141                        flush_dcache_page(page);
3142                        SetPageUptodate(page);
3143                }
3144
3145                unlock_page(page);
3146
3147                if (rdata->result == 0)
3148                        cifs_readpage_to_fscache(rdata->mapping->host, page);
3149
3150                page_cache_release(page);
3151                rdata->pages[i] = NULL;
3152        }
3153        kref_put(&rdata->refcount, cifs_readdata_release);
3154}
3155
3156static int
3157cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3158                        struct cifs_readdata *rdata, unsigned int len)
3159{
3160        int total_read = 0, result = 0;
3161        unsigned int i;
3162        u64 eof;
3163        pgoff_t eof_index;
3164        unsigned int nr_pages = rdata->nr_pages;
3165        struct kvec iov;
3166
3167        /* determine the eof that the server (probably) has */
3168        eof = CIFS_I(rdata->mapping->host)->server_eof;
3169        eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3170        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3171
3172        rdata->tailsz = PAGE_CACHE_SIZE;
3173        for (i = 0; i < nr_pages; i++) {
3174                struct page *page = rdata->pages[i];
3175
3176                if (len >= PAGE_CACHE_SIZE) {
3177                        /* enough data to fill the page */
3178                        iov.iov_base = kmap(page);
3179                        iov.iov_len = PAGE_CACHE_SIZE;
3180                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3181                                 i, page->index, iov.iov_base, iov.iov_len);
3182                        len -= PAGE_CACHE_SIZE;
3183                } else if (len > 0) {
3184                        /* enough for partial page, fill and zero the rest */
3185                        iov.iov_base = kmap(page);
3186                        iov.iov_len = len;
3187                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3188                                 i, page->index, iov.iov_base, iov.iov_len);
3189                        memset(iov.iov_base + len,
3190                                '\0', PAGE_CACHE_SIZE - len);
3191                        rdata->tailsz = len;
3192                        len = 0;
3193                } else if (page->index > eof_index) {
3194                        /*
3195                         * The VFS will not try to do readahead past the
3196                         * i_size, but it's possible that we have outstanding
3197                         * writes with gaps in the middle and the i_size hasn't
3198                         * caught up yet. Populate those with zeroed out pages
3199                         * to prevent the VFS from repeatedly attempting to
3200                         * fill them until the writes are flushed.
3201                         */
3202                        zero_user(page, 0, PAGE_CACHE_SIZE);
3203                        lru_cache_add_file(page);
3204                        flush_dcache_page(page);
3205                        SetPageUptodate(page);
3206                        unlock_page(page);
3207                        page_cache_release(page);
3208                        rdata->pages[i] = NULL;
3209                        rdata->nr_pages--;
3210                        continue;
3211                } else {
3212                        /* no need to hold page hostage */
3213                        lru_cache_add_file(page);
3214                        unlock_page(page);
3215                        page_cache_release(page);
3216                        rdata->pages[i] = NULL;
3217                        rdata->nr_pages--;
3218                        continue;
3219                }
3220
3221                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3222                kunmap(page);
3223                if (result < 0)
3224                        break;
3225
3226                total_read += result;
3227        }
3228
3229        return total_read > 0 ? total_read : result;
3230}
3231
3232static int cifs_readpages(struct file *file, struct address_space *mapping,
3233        struct list_head *page_list, unsigned num_pages)
3234{
3235        int rc;
3236        struct list_head tmplist;
3237        struct cifsFileInfo *open_file = file->private_data;
3238        struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3239        unsigned int rsize = cifs_sb->rsize;
3240        pid_t pid;
3241
3242        /*
3243         * Give up immediately if rsize is too small to read an entire page.
3244         * The VFS will fall back to readpage. We should never reach this
3245         * point however since we set ra_pages to 0 when the rsize is smaller
3246         * than a cache page.
3247         */
3248        if (unlikely(rsize < PAGE_CACHE_SIZE))
3249                return 0;
3250
3251        /*
3252         * Reads as many pages as possible from fscache. Returns -ENOBUFS
3253         * immediately if the cookie is negative
3254         */
3255        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3256                                         &num_pages);
3257        if (rc == 0)
3258                return rc;
3259
3260        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3261                pid = open_file->pid;
3262        else
3263                pid = current->tgid;
3264
3265        rc = 0;
3266        INIT_LIST_HEAD(&tmplist);
3267
3268        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3269                 __func__, file, mapping, num_pages);
3270
3271        /*
3272         * Start with the page at end of list and move it to private
3273         * list. Do the same with any following pages until we hit
3274         * the rsize limit, hit an index discontinuity, or run out of
3275         * pages. Issue the async read and then start the loop again
3276         * until the list is empty.
3277         *
3278         * Note that list order is important. The page_list is in
3279         * the order of declining indexes. When we put the pages in
3280         * the rdata->pages, then we want them in increasing order.
3281         */
3282        while (!list_empty(page_list)) {
3283                unsigned int i;
3284                unsigned int bytes = PAGE_CACHE_SIZE;
3285                unsigned int expected_index;
3286                unsigned int nr_pages = 1;
3287                loff_t offset;
3288                struct page *page, *tpage;
3289                struct cifs_readdata *rdata;
3290
3291                page = list_entry(page_list->prev, struct page, lru);
3292
3293                /*
3294                 * Lock the page and put it in the cache. Since no one else
3295                 * should have access to this page, we're safe to simply set
3296                 * PG_locked without checking it first.
3297                 */
3298                __set_page_locked(page);
3299                rc = add_to_page_cache_locked(page, mapping,
3300                                              page->index, GFP_KERNEL);
3301
3302                /* give up if we can't stick it in the cache */
3303                if (rc) {
3304                        __clear_page_locked(page);
3305                        break;
3306                }
3307
3308                /* move first page to the tmplist */
3309                offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3310                list_move_tail(&page->lru, &tmplist);
3311
3312                /* now try and add more pages onto the request */
3313                expected_index = page->index + 1;
3314                list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3315                        /* discontinuity ? */
3316                        if (page->index != expected_index)
3317                                break;
3318
3319                        /* would this page push the read over the rsize? */
3320                        if (bytes + PAGE_CACHE_SIZE > rsize)
3321                                break;
3322
3323                        __set_page_locked(page);
3324                        if (add_to_page_cache_locked(page, mapping,
3325                                                page->index, GFP_KERNEL)) {
3326                                __clear_page_locked(page);
3327                                break;
3328                        }
3329                        list_move_tail(&page->lru, &tmplist);
3330                        bytes += PAGE_CACHE_SIZE;
3331                        expected_index++;
3332                        nr_pages++;
3333                }
3334
3335                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3336                if (!rdata) {
3337                        /* best to give up if we're out of mem */
3338                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3339                                list_del(&page->lru);
3340                                lru_cache_add_file(page);
3341                                unlock_page(page);
3342                                page_cache_release(page);
3343                        }
3344                        rc = -ENOMEM;
3345                        break;
3346                }
3347
3348                rdata->cfile = cifsFileInfo_get(open_file);
3349                rdata->mapping = mapping;
3350                rdata->offset = offset;
3351                rdata->bytes = bytes;
3352                rdata->pid = pid;
3353                rdata->pagesz = PAGE_CACHE_SIZE;
3354                rdata->read_into_pages = cifs_readpages_read_into_pages;
3355
3356                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3357                        list_del(&page->lru);
3358                        rdata->pages[rdata->nr_pages++] = page;
3359                }
3360
3361                rc = cifs_retry_async_readv(rdata);
3362                if (rc != 0) {
3363                        for (i = 0; i < rdata->nr_pages; i++) {
3364                                page = rdata->pages[i];
3365                                lru_cache_add_file(page);
3366                                unlock_page(page);
3367                                page_cache_release(page);
3368                        }
3369                        kref_put(&rdata->refcount, cifs_readdata_release);
3370                        break;
3371                }
3372
3373                kref_put(&rdata->refcount, cifs_readdata_release);
3374        }
3375
3376        return rc;
3377}
3378
3379static int cifs_readpage_worker(struct file *file, struct page *page,
3380        loff_t *poffset)
3381{
3382        char *read_data;
3383        int rc;
3384
3385        /* Is the page cached? */
3386        rc = cifs_readpage_from_fscache(file_inode(file), page);
3387        if (rc == 0)
3388                goto read_complete;
3389
3390        page_cache_get(page);
3391        read_data = kmap(page);
3392        /* for reads over a certain size could initiate async read ahead */
3393
3394        rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3395
3396        if (rc < 0)
3397                goto io_error;
3398        else
3399                cifs_dbg(FYI, "Bytes read %d\n", rc);
3400
3401        file_inode(file)->i_atime =
3402                current_fs_time(file_inode(file)->i_sb);
3403
3404        if (PAGE_CACHE_SIZE > rc)
3405                memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3406
3407        flush_dcache_page(page);
3408        SetPageUptodate(page);
3409
3410        /* send this page to the cache */
3411        cifs_readpage_to_fscache(file_inode(file), page);
3412
3413        rc = 0;
3414
3415io_error:
3416        kunmap(page);
3417        page_cache_release(page);
3418
3419read_complete:
3420        return rc;
3421}
3422
3423static int cifs_readpage(struct file *file, struct page *page)
3424{
3425        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3426        int rc = -EACCES;
3427        unsigned int xid;
3428
3429        xid = get_xid();
3430
3431        if (file->private_data == NULL) {
3432                rc = -EBADF;
3433                free_xid(xid);
3434                return rc;
3435        }
3436
3437        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3438                 page, (int)offset, (int)offset);
3439
3440        rc = cifs_readpage_worker(file, page, &offset);
3441
3442        unlock_page(page);
3443
3444        free_xid(xid);
3445        return rc;
3446}
3447
3448static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3449{
3450        struct cifsFileInfo *open_file;
3451
3452        spin_lock(&cifs_file_list_lock);
3453        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3454                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3455                        spin_unlock(&cifs_file_list_lock);
3456                        return 1;
3457                }
3458        }
3459        spin_unlock(&cifs_file_list_lock);
3460        return 0;
3461}
3462
3463/* We do not want to update the file size from server for inodes
3464   open for write - to avoid races with writepage extending
3465   the file - in the future we could consider allowing
3466   refreshing the inode only on increases in the file size
3467   but this is tricky to do without racing with writebehind
3468   page caching in the current Linux kernel design */
3469bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3470{
3471        if (!cifsInode)
3472                return true;
3473
3474        if (is_inode_writable(cifsInode)) {
3475                /* This inode is open for write at least once */
3476                struct cifs_sb_info *cifs_sb;
3477
3478                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3479                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3480                        /* since no page cache to corrupt on directio
3481                        we can change size safely */
3482                        return true;
3483                }
3484
3485                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3486                        return true;
3487
3488                return false;
3489        } else
3490                return true;
3491}
3492
3493static int cifs_write_begin(struct file *file, struct address_space *mapping,
3494                        loff_t pos, unsigned len, unsigned flags,
3495                        struct page **pagep, void **fsdata)
3496{
3497        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3498        loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3499        loff_t page_start = pos & PAGE_MASK;
3500        loff_t i_size;
3501        struct page *page;
3502        int rc = 0;
3503
3504        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3505
3506        page = grab_cache_page_write_begin(mapping, index, flags);
3507        if (!page) {
3508                rc = -ENOMEM;
3509                goto out;
3510        }
3511
3512        if (PageUptodate(page))
3513                goto out;
3514
3515        /*
3516         * If we write a full page it will be up to date, no need to read from
3517         * the server. If the write is short, we'll end up doing a sync write
3518         * instead.
3519         */
3520        if (len == PAGE_CACHE_SIZE)
3521                goto out;
3522
3523        /*
3524         * optimize away the read when we have an oplock, and we're not
3525         * expecting to use any of the data we'd be reading in. That
3526         * is, when the page lies beyond the EOF, or straddles the EOF
3527         * and the write will cover all of the existing data.
3528         */
3529        if (CIFS_I(mapping->host)->clientCanCacheRead) {
3530                i_size = i_size_read(mapping->host);
3531                if (page_start >= i_size ||
3532                    (offset == 0 && (pos + len) >= i_size)) {
3533                        zero_user_segments(page, 0, offset,
3534                                           offset + len,
3535                                           PAGE_CACHE_SIZE);
3536                        /*
3537                         * PageChecked means that the parts of the page
3538                         * to which we're not writing are considered up
3539                         * to date. Once the data is copied to the
3540                         * page, it can be set uptodate.
3541                         */
3542                        SetPageChecked(page);
3543                        goto out;
3544                }
3545        }
3546
3547        if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3548                /*
3549                 * might as well read a page, it is fast enough. If we get
3550                 * an error, we don't need to return it. cifs_write_end will
3551                 * do a sync write instead since PG_uptodate isn't set.
3552                 */
3553                cifs_readpage_worker(file, page, &page_start);
3554        } else {
3555                /* we could try using another file handle if there is one -
3556                   but how would we lock it to prevent close of that handle
3557                   racing with this read? In any case
3558                   this will be written out by write_end so is fine */
3559        }
3560out:
3561        *pagep = page;
3562        return rc;
3563}
3564
3565static int cifs_release_page(struct page *page, gfp_t gfp)
3566{
3567        if (PagePrivate(page))
3568                return 0;
3569
3570        return cifs_fscache_release_page(page, gfp);
3571}
3572
3573static void cifs_invalidate_page(struct page *page, unsigned int offset,
3574                                 unsigned int length)
3575{
3576        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3577
3578        if (offset == 0 && length == PAGE_CACHE_SIZE)
3579                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3580}
3581
3582static int cifs_launder_page(struct page *page)
3583{
3584        int rc = 0;
3585        loff_t range_start = page_offset(page);
3586        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3587        struct writeback_control wbc = {
3588                .sync_mode = WB_SYNC_ALL,
3589                .nr_to_write = 0,
3590                .range_start = range_start,
3591                .range_end = range_end,
3592        };
3593
3594        cifs_dbg(FYI, "Launder page: %p\n", page);
3595
3596        if (clear_page_dirty_for_io(page))
3597                rc = cifs_writepage_locked(page, &wbc);
3598
3599        cifs_fscache_invalidate_page(page, page->mapping->host);
3600        return rc;
3601}
3602
3603void cifs_oplock_break(struct work_struct *work)
3604{
3605        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3606                                                  oplock_break);
3607        struct inode *inode = cfile->dentry->d_inode;
3608        struct cifsInodeInfo *cinode = CIFS_I(inode);
3609        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3610        int rc = 0;
3611
3612        if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3613                                                cifs_has_mand_locks(cinode)) {
3614                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3615                         inode);
3616                cinode->clientCanCacheRead = false;
3617        }
3618
3619        if (inode && S_ISREG(inode->i_mode)) {
3620                if (cinode->clientCanCacheRead)
3621                        break_lease(inode, O_RDONLY);
3622                else
3623                        break_lease(inode, O_WRONLY);
3624                rc = filemap_fdatawrite(inode->i_mapping);
3625                if (cinode->clientCanCacheRead == 0) {
3626                        rc = filemap_fdatawait(inode->i_mapping);
3627                        mapping_set_error(inode->i_mapping, rc);
3628                        cifs_invalidate_mapping(inode);
3629                }
3630                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3631        }
3632
3633        rc = cifs_push_locks(cfile);
3634        if (rc)
3635                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3636
3637        /*
3638         * releasing stale oplock after recent reconnect of smb session using
3639         * a now incorrect file handle is not a data integrity issue but do
3640         * not bother sending an oplock release if session to server still is
3641         * disconnected since oplock already released by the server
3642         */
3643        if (!cfile->oplock_break_cancelled) {
3644                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3645                                                             cinode);
3646                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3647        }
3648}
3649
3650const struct address_space_operations cifs_addr_ops = {
3651        .readpage = cifs_readpage,
3652        .readpages = cifs_readpages,
3653        .writepage = cifs_writepage,
3654        .writepages = cifs_writepages,
3655        .write_begin = cifs_write_begin,
3656        .write_end = cifs_write_end,
3657        .set_page_dirty = __set_page_dirty_nobuffers,
3658        .releasepage = cifs_release_page,
3659        .invalidatepage = cifs_invalidate_page,
3660        .launder_page = cifs_launder_page,
3661};
3662
3663/*
3664 * cifs_readpages requires the server to support a buffer large enough to
3665 * contain the header plus one complete page of data.  Otherwise, we need
3666 * to leave cifs_readpages out of the address space operations.
3667 */
3668const struct address_space_operations cifs_addr_ops_smallbuf = {
3669        .readpage = cifs_readpage,
3670        .writepage = cifs_writepage,
3671        .writepages = cifs_writepages,
3672        .write_begin = cifs_write_begin,
3673        .write_end = cifs_write_end,
3674        .set_page_dirty = __set_page_dirty_nobuffers,
3675        .releasepage = cifs_release_page,
3676        .invalidatepage = cifs_invalidate_page,
3677        .launder_page = cifs_launder_page,
3678};
3679