linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49        if ((flags & O_ACCMODE) == O_RDONLY)
  50                return GENERIC_READ;
  51        else if ((flags & O_ACCMODE) == O_WRONLY)
  52                return GENERIC_WRITE;
  53        else if ((flags & O_ACCMODE) == O_RDWR) {
  54                /* GENERIC_ALL is too much permission to request
  55                   can cause unnecessary access denied on create */
  56                /* return GENERIC_ALL; */
  57                return (GENERIC_READ | GENERIC_WRITE);
  58        }
  59
  60        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62                FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67        u32 posix_flags = 0;
  68
  69        if ((flags & O_ACCMODE) == O_RDONLY)
  70                posix_flags = SMB_O_RDONLY;
  71        else if ((flags & O_ACCMODE) == O_WRONLY)
  72                posix_flags = SMB_O_WRONLY;
  73        else if ((flags & O_ACCMODE) == O_RDWR)
  74                posix_flags = SMB_O_RDWR;
  75
  76        if (flags & O_CREAT) {
  77                posix_flags |= SMB_O_CREAT;
  78                if (flags & O_EXCL)
  79                        posix_flags |= SMB_O_EXCL;
  80        } else if (flags & O_EXCL)
  81                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82                         current->comm, current->tgid);
  83
  84        if (flags & O_TRUNC)
  85                posix_flags |= SMB_O_TRUNC;
  86        /* be safe and imply O_SYNC for O_DSYNC */
  87        if (flags & O_DSYNC)
  88                posix_flags |= SMB_O_SYNC;
  89        if (flags & O_DIRECTORY)
  90                posix_flags |= SMB_O_DIRECTORY;
  91        if (flags & O_NOFOLLOW)
  92                posix_flags |= SMB_O_NOFOLLOW;
  93        if (flags & O_DIRECT)
  94                posix_flags |= SMB_O_DIRECT;
  95
  96        return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102                return FILE_CREATE;
 103        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104                return FILE_OVERWRITE_IF;
 105        else if ((flags & O_CREAT) == O_CREAT)
 106                return FILE_OPEN_IF;
 107        else if ((flags & O_TRUNC) == O_TRUNC)
 108                return FILE_OVERWRITE;
 109        else
 110                return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114                        struct super_block *sb, int mode, unsigned int f_flags,
 115                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117        int rc;
 118        FILE_UNIX_BASIC_INFO *presp_data;
 119        __u32 posix_flags = 0;
 120        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121        struct cifs_fattr fattr;
 122        struct tcon_link *tlink;
 123        struct cifs_tcon *tcon;
 124
 125        cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128        if (presp_data == NULL)
 129                return -ENOMEM;
 130
 131        tlink = cifs_sb_tlink(cifs_sb);
 132        if (IS_ERR(tlink)) {
 133                rc = PTR_ERR(tlink);
 134                goto posix_open_ret;
 135        }
 136
 137        tcon = tlink_tcon(tlink);
 138        mode &= ~current_umask();
 139
 140        posix_flags = cifs_posix_convert_flags(f_flags);
 141        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142                             poplock, full_path, cifs_sb->local_nls,
 143                             cifs_sb->mnt_cifs_flags &
 144                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
 145        cifs_put_tlink(tlink);
 146
 147        if (rc)
 148                goto posix_open_ret;
 149
 150        if (presp_data->Type == cpu_to_le32(-1))
 151                goto posix_open_ret; /* open ok, caller does qpathinfo */
 152
 153        if (!pinode)
 154                goto posix_open_ret; /* caller does not need info */
 155
 156        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 157
 158        /* get new inode and set it up */
 159        if (*pinode == NULL) {
 160                cifs_fill_uniqueid(sb, &fattr);
 161                *pinode = cifs_iget(sb, &fattr);
 162                if (!*pinode) {
 163                        rc = -ENOMEM;
 164                        goto posix_open_ret;
 165                }
 166        } else {
 167                cifs_fattr_to_inode(*pinode, &fattr);
 168        }
 169
 170posix_open_ret:
 171        kfree(presp_data);
 172        return rc;
 173}
 174
 175static int
 176cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 177             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 178             struct cifs_fid *fid, unsigned int xid)
 179{
 180        int rc;
 181        int desired_access;
 182        int disposition;
 183        int create_options = CREATE_NOT_DIR;
 184        FILE_ALL_INFO *buf;
 185        struct TCP_Server_Info *server = tcon->ses->server;
 186
 187        if (!server->ops->open)
 188                return -ENOSYS;
 189
 190        desired_access = cifs_convert_flags(f_flags);
 191
 192/*********************************************************************
 193 *  open flag mapping table:
 194 *
 195 *      POSIX Flag            CIFS Disposition
 196 *      ----------            ----------------
 197 *      O_CREAT               FILE_OPEN_IF
 198 *      O_CREAT | O_EXCL      FILE_CREATE
 199 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 200 *      O_TRUNC               FILE_OVERWRITE
 201 *      none of the above     FILE_OPEN
 202 *
 203 *      Note that there is not a direct match between disposition
 204 *      FILE_SUPERSEDE (ie create whether or not file exists although
 205 *      O_CREAT | O_TRUNC is similar but truncates the existing
 206 *      file rather than creating a new file as FILE_SUPERSEDE does
 207 *      (which uses the attributes / metadata passed in on open call)
 208 *?
 209 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 210 *?  and the read write flags match reasonably.  O_LARGEFILE
 211 *?  is irrelevant because largefile support is always used
 212 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 213 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 214 *********************************************************************/
 215
 216        disposition = cifs_get_disposition(f_flags);
 217
 218        /* BB pass O_SYNC flag through on file attributes .. BB */
 219
 220        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 221        if (!buf)
 222                return -ENOMEM;
 223
 224        if (backup_cred(cifs_sb))
 225                create_options |= CREATE_OPEN_BACKUP_INTENT;
 226
 227        rc = server->ops->open(xid, tcon, full_path, disposition,
 228                               desired_access, create_options, fid, oplock, buf,
 229                               cifs_sb);
 230
 231        if (rc)
 232                goto out;
 233
 234        if (tcon->unix_ext)
 235                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 236                                              xid);
 237        else
 238                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 239                                         xid, &fid->netfid);
 240
 241out:
 242        kfree(buf);
 243        return rc;
 244}
 245
 246static bool
 247cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 248{
 249        struct cifs_fid_locks *cur;
 250        bool has_locks = false;
 251
 252        down_read(&cinode->lock_sem);
 253        list_for_each_entry(cur, &cinode->llist, llist) {
 254                if (!list_empty(&cur->locks)) {
 255                        has_locks = true;
 256                        break;
 257                }
 258        }
 259        up_read(&cinode->lock_sem);
 260        return has_locks;
 261}
 262
 263struct cifsFileInfo *
 264cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 265                  struct tcon_link *tlink, __u32 oplock)
 266{
 267        struct dentry *dentry = file->f_path.dentry;
 268        struct inode *inode = dentry->d_inode;
 269        struct cifsInodeInfo *cinode = CIFS_I(inode);
 270        struct cifsFileInfo *cfile;
 271        struct cifs_fid_locks *fdlocks;
 272        struct cifs_tcon *tcon = tlink_tcon(tlink);
 273        struct TCP_Server_Info *server = tcon->ses->server;
 274
 275        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 276        if (cfile == NULL)
 277                return cfile;
 278
 279        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 280        if (!fdlocks) {
 281                kfree(cfile);
 282                return NULL;
 283        }
 284
 285        INIT_LIST_HEAD(&fdlocks->locks);
 286        fdlocks->cfile = cfile;
 287        cfile->llist = fdlocks;
 288        down_write(&cinode->lock_sem);
 289        list_add(&fdlocks->llist, &cinode->llist);
 290        up_write(&cinode->lock_sem);
 291
 292        cfile->count = 1;
 293        cfile->pid = current->tgid;
 294        cfile->uid = current_fsuid();
 295        cfile->dentry = dget(dentry);
 296        cfile->f_flags = file->f_flags;
 297        cfile->invalidHandle = false;
 298        cfile->tlink = cifs_get_tlink(tlink);
 299        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 300        mutex_init(&cfile->fh_mutex);
 301
 302        cifs_sb_active(inode->i_sb);
 303
 304        /*
 305         * If the server returned a read oplock and we have mandatory brlocks,
 306         * set oplock level to None.
 307         */
 308        if (oplock == server->vals->oplock_read &&
 309                                                cifs_has_mand_locks(cinode)) {
 310                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 311                oplock = 0;
 312        }
 313
 314        spin_lock(&cifs_file_list_lock);
 315        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 316                oplock = fid->pending_open->oplock;
 317        list_del(&fid->pending_open->olist);
 318
 319        server->ops->set_fid(cfile, fid, oplock);
 320
 321        list_add(&cfile->tlist, &tcon->openFileList);
 322        /* if readable file instance put first in list*/
 323        if (file->f_mode & FMODE_READ)
 324                list_add(&cfile->flist, &cinode->openFileList);
 325        else
 326                list_add_tail(&cfile->flist, &cinode->openFileList);
 327        spin_unlock(&cifs_file_list_lock);
 328
 329        file->private_data = cfile;
 330        return cfile;
 331}
 332
 333struct cifsFileInfo *
 334cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 335{
 336        spin_lock(&cifs_file_list_lock);
 337        cifsFileInfo_get_locked(cifs_file);
 338        spin_unlock(&cifs_file_list_lock);
 339        return cifs_file;
 340}
 341
 342/*
 343 * Release a reference on the file private data. This may involve closing
 344 * the filehandle out on the server. Must be called without holding
 345 * cifs_file_list_lock.
 346 */
 347void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 348{
 349        struct inode *inode = cifs_file->dentry->d_inode;
 350        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 351        struct TCP_Server_Info *server = tcon->ses->server;
 352        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 353        struct super_block *sb = inode->i_sb;
 354        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 355        struct cifsLockInfo *li, *tmp;
 356        struct cifs_fid fid;
 357        struct cifs_pending_open open;
 358
 359        spin_lock(&cifs_file_list_lock);
 360        if (--cifs_file->count > 0) {
 361                spin_unlock(&cifs_file_list_lock);
 362                return;
 363        }
 364
 365        if (server->ops->get_lease_key)
 366                server->ops->get_lease_key(inode, &fid);
 367
 368        /* store open in pending opens to make sure we don't miss lease break */
 369        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 370
 371        /* remove it from the lists */
 372        list_del(&cifs_file->flist);
 373        list_del(&cifs_file->tlist);
 374
 375        if (list_empty(&cifsi->openFileList)) {
 376                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 377                         cifs_file->dentry->d_inode);
 378                /*
 379                 * In strict cache mode we need invalidate mapping on the last
 380                 * close  because it may cause a error when we open this file
 381                 * again and get at least level II oplock.
 382                 */
 383                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 384                        CIFS_I(inode)->invalid_mapping = true;
 385                cifs_set_oplock_level(cifsi, 0);
 386        }
 387        spin_unlock(&cifs_file_list_lock);
 388
 389        cancel_work_sync(&cifs_file->oplock_break);
 390
 391        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 392                struct TCP_Server_Info *server = tcon->ses->server;
 393                unsigned int xid;
 394
 395                xid = get_xid();
 396                if (server->ops->close)
 397                        server->ops->close(xid, tcon, &cifs_file->fid);
 398                _free_xid(xid);
 399        }
 400
 401        cifs_del_pending_open(&open);
 402
 403        /*
 404         * Delete any outstanding lock records. We'll lose them when the file
 405         * is closed anyway.
 406         */
 407        down_write(&cifsi->lock_sem);
 408        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 409                list_del(&li->llist);
 410                cifs_del_lock_waiters(li);
 411                kfree(li);
 412        }
 413        list_del(&cifs_file->llist->llist);
 414        kfree(cifs_file->llist);
 415        up_write(&cifsi->lock_sem);
 416
 417        cifs_put_tlink(cifs_file->tlink);
 418        dput(cifs_file->dentry);
 419        cifs_sb_deactive(sb);
 420        kfree(cifs_file);
 421}
 422
 423int cifs_open(struct inode *inode, struct file *file)
 424
 425{
 426        int rc = -EACCES;
 427        unsigned int xid;
 428        __u32 oplock;
 429        struct cifs_sb_info *cifs_sb;
 430        struct TCP_Server_Info *server;
 431        struct cifs_tcon *tcon;
 432        struct tcon_link *tlink;
 433        struct cifsFileInfo *cfile = NULL;
 434        char *full_path = NULL;
 435        bool posix_open_ok = false;
 436        struct cifs_fid fid;
 437        struct cifs_pending_open open;
 438
 439        xid = get_xid();
 440
 441        cifs_sb = CIFS_SB(inode->i_sb);
 442        tlink = cifs_sb_tlink(cifs_sb);
 443        if (IS_ERR(tlink)) {
 444                free_xid(xid);
 445                return PTR_ERR(tlink);
 446        }
 447        tcon = tlink_tcon(tlink);
 448        server = tcon->ses->server;
 449
 450        full_path = build_path_from_dentry(file->f_path.dentry);
 451        if (full_path == NULL) {
 452                rc = -ENOMEM;
 453                goto out;
 454        }
 455
 456        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 457                 inode, file->f_flags, full_path);
 458
 459        if (server->oplocks)
 460                oplock = REQ_OPLOCK;
 461        else
 462                oplock = 0;
 463
 464        if (!tcon->broken_posix_open && tcon->unix_ext &&
 465            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 466                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 467                /* can not refresh inode info since size could be stale */
 468                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 469                                cifs_sb->mnt_file_mode /* ignored */,
 470                                file->f_flags, &oplock, &fid.netfid, xid);
 471                if (rc == 0) {
 472                        cifs_dbg(FYI, "posix open succeeded\n");
 473                        posix_open_ok = true;
 474                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 475                        if (tcon->ses->serverNOS)
 476                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 477                                         tcon->ses->serverName,
 478                                         tcon->ses->serverNOS);
 479                        tcon->broken_posix_open = true;
 480                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 481                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 482                        goto out;
 483                /*
 484                 * Else fallthrough to retry open the old way on network i/o
 485                 * or DFS errors.
 486                 */
 487        }
 488
 489        if (server->ops->get_lease_key)
 490                server->ops->get_lease_key(inode, &fid);
 491
 492        cifs_add_pending_open(&fid, tlink, &open);
 493
 494        if (!posix_open_ok) {
 495                if (server->ops->get_lease_key)
 496                        server->ops->get_lease_key(inode, &fid);
 497
 498                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 499                                  file->f_flags, &oplock, &fid, xid);
 500                if (rc) {
 501                        cifs_del_pending_open(&open);
 502                        goto out;
 503                }
 504        }
 505
 506        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 507        if (cfile == NULL) {
 508                if (server->ops->close)
 509                        server->ops->close(xid, tcon, &fid);
 510                cifs_del_pending_open(&open);
 511                rc = -ENOMEM;
 512                goto out;
 513        }
 514
 515        cifs_fscache_set_inode_cookie(inode, file);
 516
 517        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 518                /*
 519                 * Time to set mode which we can not set earlier due to
 520                 * problems creating new read-only files.
 521                 */
 522                struct cifs_unix_set_info_args args = {
 523                        .mode   = inode->i_mode,
 524                        .uid    = INVALID_UID, /* no change */
 525                        .gid    = INVALID_GID, /* no change */
 526                        .ctime  = NO_CHANGE_64,
 527                        .atime  = NO_CHANGE_64,
 528                        .mtime  = NO_CHANGE_64,
 529                        .device = 0,
 530                };
 531                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 532                                       cfile->pid);
 533        }
 534
 535out:
 536        kfree(full_path);
 537        free_xid(xid);
 538        cifs_put_tlink(tlink);
 539        return rc;
 540}
 541
 542static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 543
 544/*
 545 * Try to reacquire byte range locks that were released when session
 546 * to server was lost.
 547 */
 548static int
 549cifs_relock_file(struct cifsFileInfo *cfile)
 550{
 551        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 552        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 553        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 554        int rc = 0;
 555
 556        /* we are going to update can_cache_brlcks here - need a write access */
 557        down_write(&cinode->lock_sem);
 558        if (cinode->can_cache_brlcks) {
 559                /* can cache locks - no need to push them */
 560                up_write(&cinode->lock_sem);
 561                return rc;
 562        }
 563
 564        if (cap_unix(tcon->ses) &&
 565            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 566            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 567                rc = cifs_push_posix_locks(cfile);
 568        else
 569                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 570
 571        up_write(&cinode->lock_sem);
 572        return rc;
 573}
 574
 575static int
 576cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 577{
 578        int rc = -EACCES;
 579        unsigned int xid;
 580        __u32 oplock;
 581        struct cifs_sb_info *cifs_sb;
 582        struct cifs_tcon *tcon;
 583        struct TCP_Server_Info *server;
 584        struct cifsInodeInfo *cinode;
 585        struct inode *inode;
 586        char *full_path = NULL;
 587        int desired_access;
 588        int disposition = FILE_OPEN;
 589        int create_options = CREATE_NOT_DIR;
 590        struct cifs_fid fid;
 591
 592        xid = get_xid();
 593        mutex_lock(&cfile->fh_mutex);
 594        if (!cfile->invalidHandle) {
 595                mutex_unlock(&cfile->fh_mutex);
 596                rc = 0;
 597                free_xid(xid);
 598                return rc;
 599        }
 600
 601        inode = cfile->dentry->d_inode;
 602        cifs_sb = CIFS_SB(inode->i_sb);
 603        tcon = tlink_tcon(cfile->tlink);
 604        server = tcon->ses->server;
 605
 606        /*
 607         * Can not grab rename sem here because various ops, including those
 608         * that already have the rename sem can end up causing writepage to get
 609         * called and if the server was down that means we end up here, and we
 610         * can never tell if the caller already has the rename_sem.
 611         */
 612        full_path = build_path_from_dentry(cfile->dentry);
 613        if (full_path == NULL) {
 614                rc = -ENOMEM;
 615                mutex_unlock(&cfile->fh_mutex);
 616                free_xid(xid);
 617                return rc;
 618        }
 619
 620        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 621                 inode, cfile->f_flags, full_path);
 622
 623        if (tcon->ses->server->oplocks)
 624                oplock = REQ_OPLOCK;
 625        else
 626                oplock = 0;
 627
 628        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 629            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 630                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 631                /*
 632                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 633                 * original open. Must mask them off for a reopen.
 634                 */
 635                unsigned int oflags = cfile->f_flags &
 636                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 637
 638                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 639                                     cifs_sb->mnt_file_mode /* ignored */,
 640                                     oflags, &oplock, &fid.netfid, xid);
 641                if (rc == 0) {
 642                        cifs_dbg(FYI, "posix reopen succeeded\n");
 643                        goto reopen_success;
 644                }
 645                /*
 646                 * fallthrough to retry open the old way on errors, especially
 647                 * in the reconnect path it is important to retry hard
 648                 */
 649        }
 650
 651        desired_access = cifs_convert_flags(cfile->f_flags);
 652
 653        if (backup_cred(cifs_sb))
 654                create_options |= CREATE_OPEN_BACKUP_INTENT;
 655
 656        if (server->ops->get_lease_key)
 657                server->ops->get_lease_key(inode, &fid);
 658
 659        /*
 660         * Can not refresh inode by passing in file_info buf to be returned by
 661         * CIFSSMBOpen and then calling get_inode_info with returned buf since
 662         * file might have write behind data that needs to be flushed and server
 663         * version of file size can be stale. If we knew for sure that inode was
 664         * not dirty locally we could do this.
 665         */
 666        rc = server->ops->open(xid, tcon, full_path, disposition,
 667                               desired_access, create_options, &fid, &oplock,
 668                               NULL, cifs_sb);
 669        if (rc) {
 670                mutex_unlock(&cfile->fh_mutex);
 671                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 672                cifs_dbg(FYI, "oplock: %d\n", oplock);
 673                goto reopen_error_exit;
 674        }
 675
 676reopen_success:
 677        cfile->invalidHandle = false;
 678        mutex_unlock(&cfile->fh_mutex);
 679        cinode = CIFS_I(inode);
 680
 681        if (can_flush) {
 682                rc = filemap_write_and_wait(inode->i_mapping);
 683                mapping_set_error(inode->i_mapping, rc);
 684
 685                if (tcon->unix_ext)
 686                        rc = cifs_get_inode_info_unix(&inode, full_path,
 687                                                      inode->i_sb, xid);
 688                else
 689                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 690                                                 inode->i_sb, xid, NULL);
 691        }
 692        /*
 693         * Else we are writing out data to server already and could deadlock if
 694         * we tried to flush data, and since we do not know if we have data that
 695         * would invalidate the current end of file on the server we can not go
 696         * to the server to get the new inode info.
 697         */
 698
 699        server->ops->set_fid(cfile, &fid, oplock);
 700        cifs_relock_file(cfile);
 701
 702reopen_error_exit:
 703        kfree(full_path);
 704        free_xid(xid);
 705        return rc;
 706}
 707
 708int cifs_close(struct inode *inode, struct file *file)
 709{
 710        if (file->private_data != NULL) {
 711                cifsFileInfo_put(file->private_data);
 712                file->private_data = NULL;
 713        }
 714
 715        /* return code from the ->release op is always ignored */
 716        return 0;
 717}
 718
 719int cifs_closedir(struct inode *inode, struct file *file)
 720{
 721        int rc = 0;
 722        unsigned int xid;
 723        struct cifsFileInfo *cfile = file->private_data;
 724        struct cifs_tcon *tcon;
 725        struct TCP_Server_Info *server;
 726        char *buf;
 727
 728        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 729
 730        if (cfile == NULL)
 731                return rc;
 732
 733        xid = get_xid();
 734        tcon = tlink_tcon(cfile->tlink);
 735        server = tcon->ses->server;
 736
 737        cifs_dbg(FYI, "Freeing private data in close dir\n");
 738        spin_lock(&cifs_file_list_lock);
 739        if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
 740                cfile->invalidHandle = true;
 741                spin_unlock(&cifs_file_list_lock);
 742                if (server->ops->close_dir)
 743                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 744                else
 745                        rc = -ENOSYS;
 746                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 747                /* not much we can do if it fails anyway, ignore rc */
 748                rc = 0;
 749        } else
 750                spin_unlock(&cifs_file_list_lock);
 751
 752        buf = cfile->srch_inf.ntwrk_buf_start;
 753        if (buf) {
 754                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 755                cfile->srch_inf.ntwrk_buf_start = NULL;
 756                if (cfile->srch_inf.smallBuf)
 757                        cifs_small_buf_release(buf);
 758                else
 759                        cifs_buf_release(buf);
 760        }
 761
 762        cifs_put_tlink(cfile->tlink);
 763        kfree(file->private_data);
 764        file->private_data = NULL;
 765        /* BB can we lock the filestruct while this is going on? */
 766        free_xid(xid);
 767        return rc;
 768}
 769
 770static struct cifsLockInfo *
 771cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 772{
 773        struct cifsLockInfo *lock =
 774                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 775        if (!lock)
 776                return lock;
 777        lock->offset = offset;
 778        lock->length = length;
 779        lock->type = type;
 780        lock->pid = current->tgid;
 781        INIT_LIST_HEAD(&lock->blist);
 782        init_waitqueue_head(&lock->block_q);
 783        return lock;
 784}
 785
 786void
 787cifs_del_lock_waiters(struct cifsLockInfo *lock)
 788{
 789        struct cifsLockInfo *li, *tmp;
 790        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 791                list_del_init(&li->blist);
 792                wake_up(&li->block_q);
 793        }
 794}
 795
 796#define CIFS_LOCK_OP    0
 797#define CIFS_READ_OP    1
 798#define CIFS_WRITE_OP   2
 799
 800/* @rw_check : 0 - no op, 1 - read, 2 - write */
 801static bool
 802cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 803                            __u64 length, __u8 type, struct cifsFileInfo *cfile,
 804                            struct cifsLockInfo **conf_lock, int rw_check)
 805{
 806        struct cifsLockInfo *li;
 807        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 808        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 809
 810        list_for_each_entry(li, &fdlocks->locks, llist) {
 811                if (offset + length <= li->offset ||
 812                    offset >= li->offset + li->length)
 813                        continue;
 814                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 815                    server->ops->compare_fids(cfile, cur_cfile)) {
 816                        /* shared lock prevents write op through the same fid */
 817                        if (!(li->type & server->vals->shared_lock_type) ||
 818                            rw_check != CIFS_WRITE_OP)
 819                                continue;
 820                }
 821                if ((type & server->vals->shared_lock_type) &&
 822                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 823                     current->tgid == li->pid) || type == li->type))
 824                        continue;
 825                if (conf_lock)
 826                        *conf_lock = li;
 827                return true;
 828        }
 829        return false;
 830}
 831
 832bool
 833cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 834                        __u8 type, struct cifsLockInfo **conf_lock,
 835                        int rw_check)
 836{
 837        bool rc = false;
 838        struct cifs_fid_locks *cur;
 839        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 840
 841        list_for_each_entry(cur, &cinode->llist, llist) {
 842                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 843                                                 cfile, conf_lock, rw_check);
 844                if (rc)
 845                        break;
 846        }
 847
 848        return rc;
 849}
 850
 851/*
 852 * Check if there is another lock that prevents us to set the lock (mandatory
 853 * style). If such a lock exists, update the flock structure with its
 854 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 855 * or leave it the same if we can't. Returns 0 if we don't need to request to
 856 * the server or 1 otherwise.
 857 */
 858static int
 859cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 860               __u8 type, struct file_lock *flock)
 861{
 862        int rc = 0;
 863        struct cifsLockInfo *conf_lock;
 864        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 865        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 866        bool exist;
 867
 868        down_read(&cinode->lock_sem);
 869
 870        exist = cifs_find_lock_conflict(cfile, offset, length, type,
 871                                        &conf_lock, CIFS_LOCK_OP);
 872        if (exist) {
 873                flock->fl_start = conf_lock->offset;
 874                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 875                flock->fl_pid = conf_lock->pid;
 876                if (conf_lock->type & server->vals->shared_lock_type)
 877                        flock->fl_type = F_RDLCK;
 878                else
 879                        flock->fl_type = F_WRLCK;
 880        } else if (!cinode->can_cache_brlcks)
 881                rc = 1;
 882        else
 883                flock->fl_type = F_UNLCK;
 884
 885        up_read(&cinode->lock_sem);
 886        return rc;
 887}
 888
 889static void
 890cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 891{
 892        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 893        down_write(&cinode->lock_sem);
 894        list_add_tail(&lock->llist, &cfile->llist->locks);
 895        up_write(&cinode->lock_sem);
 896}
 897
 898/*
 899 * Set the byte-range lock (mandatory style). Returns:
 900 * 1) 0, if we set the lock and don't need to request to the server;
 901 * 2) 1, if no locks prevent us but we need to request to the server;
 902 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 903 */
 904static int
 905cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 906                 bool wait)
 907{
 908        struct cifsLockInfo *conf_lock;
 909        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 910        bool exist;
 911        int rc = 0;
 912
 913try_again:
 914        exist = false;
 915        down_write(&cinode->lock_sem);
 916
 917        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 918                                        lock->type, &conf_lock, CIFS_LOCK_OP);
 919        if (!exist && cinode->can_cache_brlcks) {
 920                list_add_tail(&lock->llist, &cfile->llist->locks);
 921                up_write(&cinode->lock_sem);
 922                return rc;
 923        }
 924
 925        if (!exist)
 926                rc = 1;
 927        else if (!wait)
 928                rc = -EACCES;
 929        else {
 930                list_add_tail(&lock->blist, &conf_lock->blist);
 931                up_write(&cinode->lock_sem);
 932                rc = wait_event_interruptible(lock->block_q,
 933                                        (lock->blist.prev == &lock->blist) &&
 934                                        (lock->blist.next == &lock->blist));
 935                if (!rc)
 936                        goto try_again;
 937                down_write(&cinode->lock_sem);
 938                list_del_init(&lock->blist);
 939        }
 940
 941        up_write(&cinode->lock_sem);
 942        return rc;
 943}
 944
 945/*
 946 * Check if there is another lock that prevents us to set the lock (posix
 947 * style). If such a lock exists, update the flock structure with its
 948 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 949 * or leave it the same if we can't. Returns 0 if we don't need to request to
 950 * the server or 1 otherwise.
 951 */
 952static int
 953cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 954{
 955        int rc = 0;
 956        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 957        unsigned char saved_type = flock->fl_type;
 958
 959        if ((flock->fl_flags & FL_POSIX) == 0)
 960                return 1;
 961
 962        down_read(&cinode->lock_sem);
 963        posix_test_lock(file, flock);
 964
 965        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
 966                flock->fl_type = saved_type;
 967                rc = 1;
 968        }
 969
 970        up_read(&cinode->lock_sem);
 971        return rc;
 972}
 973
 974/*
 975 * Set the byte-range lock (posix style). Returns:
 976 * 1) 0, if we set the lock and don't need to request to the server;
 977 * 2) 1, if we need to request to the server;
 978 * 3) <0, if the error occurs while setting the lock.
 979 */
 980static int
 981cifs_posix_lock_set(struct file *file, struct file_lock *flock)
 982{
 983        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 984        int rc = 1;
 985
 986        if ((flock->fl_flags & FL_POSIX) == 0)
 987                return rc;
 988
 989try_again:
 990        down_write(&cinode->lock_sem);
 991        if (!cinode->can_cache_brlcks) {
 992                up_write(&cinode->lock_sem);
 993                return rc;
 994        }
 995
 996        rc = posix_lock_file(file, flock, NULL);
 997        up_write(&cinode->lock_sem);
 998        if (rc == FILE_LOCK_DEFERRED) {
 999                rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1000                if (!rc)
1001                        goto try_again;
1002                locks_delete_block(flock);
1003        }
1004        return rc;
1005}
1006
1007int
1008cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1009{
1010        unsigned int xid;
1011        int rc = 0, stored_rc;
1012        struct cifsLockInfo *li, *tmp;
1013        struct cifs_tcon *tcon;
1014        unsigned int num, max_num, max_buf;
1015        LOCKING_ANDX_RANGE *buf, *cur;
1016        int types[] = {LOCKING_ANDX_LARGE_FILES,
1017                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1018        int i;
1019
1020        xid = get_xid();
1021        tcon = tlink_tcon(cfile->tlink);
1022
1023        /*
1024         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1025         * and check it for zero before using.
1026         */
1027        max_buf = tcon->ses->server->maxBuf;
1028        if (!max_buf) {
1029                free_xid(xid);
1030                return -EINVAL;
1031        }
1032
1033        max_num = (max_buf - sizeof(struct smb_hdr)) /
1034                                                sizeof(LOCKING_ANDX_RANGE);
1035        buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1036        if (!buf) {
1037                free_xid(xid);
1038                return -ENOMEM;
1039        }
1040
1041        for (i = 0; i < 2; i++) {
1042                cur = buf;
1043                num = 0;
1044                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1045                        if (li->type != types[i])
1046                                continue;
1047                        cur->Pid = cpu_to_le16(li->pid);
1048                        cur->LengthLow = cpu_to_le32((u32)li->length);
1049                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1050                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1051                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1052                        if (++num == max_num) {
1053                                stored_rc = cifs_lockv(xid, tcon,
1054                                                       cfile->fid.netfid,
1055                                                       (__u8)li->type, 0, num,
1056                                                       buf);
1057                                if (stored_rc)
1058                                        rc = stored_rc;
1059                                cur = buf;
1060                                num = 0;
1061                        } else
1062                                cur++;
1063                }
1064
1065                if (num) {
1066                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1067                                               (__u8)types[i], 0, num, buf);
1068                        if (stored_rc)
1069                                rc = stored_rc;
1070                }
1071        }
1072
1073        kfree(buf);
1074        free_xid(xid);
1075        return rc;
1076}
1077
1078/* copied from fs/locks.c with a name change */
1079#define cifs_for_each_lock(inode, lockp) \
1080        for (lockp = &inode->i_flock; *lockp != NULL; \
1081             lockp = &(*lockp)->fl_next)
1082
1083struct lock_to_push {
1084        struct list_head llist;
1085        __u64 offset;
1086        __u64 length;
1087        __u32 pid;
1088        __u16 netfid;
1089        __u8 type;
1090};
1091
1092static int
1093cifs_push_posix_locks(struct cifsFileInfo *cfile)
1094{
1095        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1096        struct file_lock *flock, **before;
1097        unsigned int count = 0, i = 0;
1098        int rc = 0, xid, type;
1099        struct list_head locks_to_send, *el;
1100        struct lock_to_push *lck, *tmp;
1101        __u64 length;
1102
1103        xid = get_xid();
1104
1105        lock_flocks();
1106        cifs_for_each_lock(cfile->dentry->d_inode, before) {
1107                if ((*before)->fl_flags & FL_POSIX)
1108                        count++;
1109        }
1110        unlock_flocks();
1111
1112        INIT_LIST_HEAD(&locks_to_send);
1113
1114        /*
1115         * Allocating count locks is enough because no FL_POSIX locks can be
1116         * added to the list while we are holding cinode->lock_sem that
1117         * protects locking operations of this inode.
1118         */
1119        for (; i < count; i++) {
1120                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1121                if (!lck) {
1122                        rc = -ENOMEM;
1123                        goto err_out;
1124                }
1125                list_add_tail(&lck->llist, &locks_to_send);
1126        }
1127
1128        el = locks_to_send.next;
1129        lock_flocks();
1130        cifs_for_each_lock(cfile->dentry->d_inode, before) {
1131                flock = *before;
1132                if ((flock->fl_flags & FL_POSIX) == 0)
1133                        continue;
1134                if (el == &locks_to_send) {
1135                        /*
1136                         * The list ended. We don't have enough allocated
1137                         * structures - something is really wrong.
1138                         */
1139                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1140                        break;
1141                }
1142                length = 1 + flock->fl_end - flock->fl_start;
1143                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1144                        type = CIFS_RDLCK;
1145                else
1146                        type = CIFS_WRLCK;
1147                lck = list_entry(el, struct lock_to_push, llist);
1148                lck->pid = flock->fl_pid;
1149                lck->netfid = cfile->fid.netfid;
1150                lck->length = length;
1151                lck->type = type;
1152                lck->offset = flock->fl_start;
1153                el = el->next;
1154        }
1155        unlock_flocks();
1156
1157        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1158                int stored_rc;
1159
1160                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1161                                             lck->offset, lck->length, NULL,
1162                                             lck->type, 0);
1163                if (stored_rc)
1164                        rc = stored_rc;
1165                list_del(&lck->llist);
1166                kfree(lck);
1167        }
1168
1169out:
1170        free_xid(xid);
1171        return rc;
1172err_out:
1173        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1174                list_del(&lck->llist);
1175                kfree(lck);
1176        }
1177        goto out;
1178}
1179
1180static int
1181cifs_push_locks(struct cifsFileInfo *cfile)
1182{
1183        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1184        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1185        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1186        int rc = 0;
1187
1188        /* we are going to update can_cache_brlcks here - need a write access */
1189        down_write(&cinode->lock_sem);
1190        if (!cinode->can_cache_brlcks) {
1191                up_write(&cinode->lock_sem);
1192                return rc;
1193        }
1194
1195        if (cap_unix(tcon->ses) &&
1196            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1197            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1198                rc = cifs_push_posix_locks(cfile);
1199        else
1200                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1201
1202        cinode->can_cache_brlcks = false;
1203        up_write(&cinode->lock_sem);
1204        return rc;
1205}
1206
1207static void
1208cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1209                bool *wait_flag, struct TCP_Server_Info *server)
1210{
1211        if (flock->fl_flags & FL_POSIX)
1212                cifs_dbg(FYI, "Posix\n");
1213        if (flock->fl_flags & FL_FLOCK)
1214                cifs_dbg(FYI, "Flock\n");
1215        if (flock->fl_flags & FL_SLEEP) {
1216                cifs_dbg(FYI, "Blocking lock\n");
1217                *wait_flag = true;
1218        }
1219        if (flock->fl_flags & FL_ACCESS)
1220                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1221        if (flock->fl_flags & FL_LEASE)
1222                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1223        if (flock->fl_flags &
1224            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1225               FL_ACCESS | FL_LEASE | FL_CLOSE)))
1226                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1227
1228        *type = server->vals->large_lock_type;
1229        if (flock->fl_type == F_WRLCK) {
1230                cifs_dbg(FYI, "F_WRLCK\n");
1231                *type |= server->vals->exclusive_lock_type;
1232                *lock = 1;
1233        } else if (flock->fl_type == F_UNLCK) {
1234                cifs_dbg(FYI, "F_UNLCK\n");
1235                *type |= server->vals->unlock_lock_type;
1236                *unlock = 1;
1237                /* Check if unlock includes more than one lock range */
1238        } else if (flock->fl_type == F_RDLCK) {
1239                cifs_dbg(FYI, "F_RDLCK\n");
1240                *type |= server->vals->shared_lock_type;
1241                *lock = 1;
1242        } else if (flock->fl_type == F_EXLCK) {
1243                cifs_dbg(FYI, "F_EXLCK\n");
1244                *type |= server->vals->exclusive_lock_type;
1245                *lock = 1;
1246        } else if (flock->fl_type == F_SHLCK) {
1247                cifs_dbg(FYI, "F_SHLCK\n");
1248                *type |= server->vals->shared_lock_type;
1249                *lock = 1;
1250        } else
1251                cifs_dbg(FYI, "Unknown type of lock\n");
1252}
1253
1254static int
1255cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1256           bool wait_flag, bool posix_lck, unsigned int xid)
1257{
1258        int rc = 0;
1259        __u64 length = 1 + flock->fl_end - flock->fl_start;
1260        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1261        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1262        struct TCP_Server_Info *server = tcon->ses->server;
1263        __u16 netfid = cfile->fid.netfid;
1264
1265        if (posix_lck) {
1266                int posix_lock_type;
1267
1268                rc = cifs_posix_lock_test(file, flock);
1269                if (!rc)
1270                        return rc;
1271
1272                if (type & server->vals->shared_lock_type)
1273                        posix_lock_type = CIFS_RDLCK;
1274                else
1275                        posix_lock_type = CIFS_WRLCK;
1276                rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1277                                      flock->fl_start, length, flock,
1278                                      posix_lock_type, wait_flag);
1279                return rc;
1280        }
1281
1282        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1283        if (!rc)
1284                return rc;
1285
1286        /* BB we could chain these into one lock request BB */
1287        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1288                                    1, 0, false);
1289        if (rc == 0) {
1290                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1291                                            type, 0, 1, false);
1292                flock->fl_type = F_UNLCK;
1293                if (rc != 0)
1294                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1295                                 rc);
1296                return 0;
1297        }
1298
1299        if (type & server->vals->shared_lock_type) {
1300                flock->fl_type = F_WRLCK;
1301                return 0;
1302        }
1303
1304        type &= ~server->vals->exclusive_lock_type;
1305
1306        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1307                                    type | server->vals->shared_lock_type,
1308                                    1, 0, false);
1309        if (rc == 0) {
1310                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1311                        type | server->vals->shared_lock_type, 0, 1, false);
1312                flock->fl_type = F_RDLCK;
1313                if (rc != 0)
1314                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1315                                 rc);
1316        } else
1317                flock->fl_type = F_WRLCK;
1318
1319        return 0;
1320}
1321
1322void
1323cifs_move_llist(struct list_head *source, struct list_head *dest)
1324{
1325        struct list_head *li, *tmp;
1326        list_for_each_safe(li, tmp, source)
1327                list_move(li, dest);
1328}
1329
1330void
1331cifs_free_llist(struct list_head *llist)
1332{
1333        struct cifsLockInfo *li, *tmp;
1334        list_for_each_entry_safe(li, tmp, llist, llist) {
1335                cifs_del_lock_waiters(li);
1336                list_del(&li->llist);
1337                kfree(li);
1338        }
1339}
1340
1341int
1342cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1343                  unsigned int xid)
1344{
1345        int rc = 0, stored_rc;
1346        int types[] = {LOCKING_ANDX_LARGE_FILES,
1347                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1348        unsigned int i;
1349        unsigned int max_num, num, max_buf;
1350        LOCKING_ANDX_RANGE *buf, *cur;
1351        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1352        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1353        struct cifsLockInfo *li, *tmp;
1354        __u64 length = 1 + flock->fl_end - flock->fl_start;
1355        struct list_head tmp_llist;
1356
1357        INIT_LIST_HEAD(&tmp_llist);
1358
1359        /*
1360         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1361         * and check it for zero before using.
1362         */
1363        max_buf = tcon->ses->server->maxBuf;
1364        if (!max_buf)
1365                return -EINVAL;
1366
1367        max_num = (max_buf - sizeof(struct smb_hdr)) /
1368                                                sizeof(LOCKING_ANDX_RANGE);
1369        buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1370        if (!buf)
1371                return -ENOMEM;
1372
1373        down_write(&cinode->lock_sem);
1374        for (i = 0; i < 2; i++) {
1375                cur = buf;
1376                num = 0;
1377                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1378                        if (flock->fl_start > li->offset ||
1379                            (flock->fl_start + length) <
1380                            (li->offset + li->length))
1381                                continue;
1382                        if (current->tgid != li->pid)
1383                                continue;
1384                        if (types[i] != li->type)
1385                                continue;
1386                        if (cinode->can_cache_brlcks) {
1387                                /*
1388                                 * We can cache brlock requests - simply remove
1389                                 * a lock from the file's list.
1390                                 */
1391                                list_del(&li->llist);
1392                                cifs_del_lock_waiters(li);
1393                                kfree(li);
1394                                continue;
1395                        }
1396                        cur->Pid = cpu_to_le16(li->pid);
1397                        cur->LengthLow = cpu_to_le32((u32)li->length);
1398                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1399                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1400                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1401                        /*
1402                         * We need to save a lock here to let us add it again to
1403                         * the file's list if the unlock range request fails on
1404                         * the server.
1405                         */
1406                        list_move(&li->llist, &tmp_llist);
1407                        if (++num == max_num) {
1408                                stored_rc = cifs_lockv(xid, tcon,
1409                                                       cfile->fid.netfid,
1410                                                       li->type, num, 0, buf);
1411                                if (stored_rc) {
1412                                        /*
1413                                         * We failed on the unlock range
1414                                         * request - add all locks from the tmp
1415                                         * list to the head of the file's list.
1416                                         */
1417                                        cifs_move_llist(&tmp_llist,
1418                                                        &cfile->llist->locks);
1419                                        rc = stored_rc;
1420                                } else
1421                                        /*
1422                                         * The unlock range request succeed -
1423                                         * free the tmp list.
1424                                         */
1425                                        cifs_free_llist(&tmp_llist);
1426                                cur = buf;
1427                                num = 0;
1428                        } else
1429                                cur++;
1430                }
1431                if (num) {
1432                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1433                                               types[i], num, 0, buf);
1434                        if (stored_rc) {
1435                                cifs_move_llist(&tmp_llist,
1436                                                &cfile->llist->locks);
1437                                rc = stored_rc;
1438                        } else
1439                                cifs_free_llist(&tmp_llist);
1440                }
1441        }
1442
1443        up_write(&cinode->lock_sem);
1444        kfree(buf);
1445        return rc;
1446}
1447
1448static int
1449cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1450           bool wait_flag, bool posix_lck, int lock, int unlock,
1451           unsigned int xid)
1452{
1453        int rc = 0;
1454        __u64 length = 1 + flock->fl_end - flock->fl_start;
1455        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1456        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1457        struct TCP_Server_Info *server = tcon->ses->server;
1458        struct inode *inode = cfile->dentry->d_inode;
1459
1460        if (posix_lck) {
1461                int posix_lock_type;
1462
1463                rc = cifs_posix_lock_set(file, flock);
1464                if (!rc || rc < 0)
1465                        return rc;
1466
1467                if (type & server->vals->shared_lock_type)
1468                        posix_lock_type = CIFS_RDLCK;
1469                else
1470                        posix_lock_type = CIFS_WRLCK;
1471
1472                if (unlock == 1)
1473                        posix_lock_type = CIFS_UNLCK;
1474
1475                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1476                                      current->tgid, flock->fl_start, length,
1477                                      NULL, posix_lock_type, wait_flag);
1478                goto out;
1479        }
1480
1481        if (lock) {
1482                struct cifsLockInfo *lock;
1483
1484                lock = cifs_lock_init(flock->fl_start, length, type);
1485                if (!lock)
1486                        return -ENOMEM;
1487
1488                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1489                if (rc < 0) {
1490                        kfree(lock);
1491                        return rc;
1492                }
1493                if (!rc)
1494                        goto out;
1495
1496                /*
1497                 * Windows 7 server can delay breaking lease from read to None
1498                 * if we set a byte-range lock on a file - break it explicitly
1499                 * before sending the lock to the server to be sure the next
1500                 * read won't conflict with non-overlapted locks due to
1501                 * pagereading.
1502                 */
1503                if (!CIFS_I(inode)->clientCanCacheAll &&
1504                                        CIFS_I(inode)->clientCanCacheRead) {
1505                        cifs_invalidate_mapping(inode);
1506                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1507                                 inode);
1508                        CIFS_I(inode)->clientCanCacheRead = false;
1509                }
1510
1511                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1512                                            type, 1, 0, wait_flag);
1513                if (rc) {
1514                        kfree(lock);
1515                        return rc;
1516                }
1517
1518                cifs_lock_add(cfile, lock);
1519        } else if (unlock)
1520                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1521
1522out:
1523        if (flock->fl_flags & FL_POSIX)
1524                posix_lock_file_wait(file, flock);
1525        return rc;
1526}
1527
1528int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1529{
1530        int rc, xid;
1531        int lock = 0, unlock = 0;
1532        bool wait_flag = false;
1533        bool posix_lck = false;
1534        struct cifs_sb_info *cifs_sb;
1535        struct cifs_tcon *tcon;
1536        struct cifsInodeInfo *cinode;
1537        struct cifsFileInfo *cfile;
1538        __u16 netfid;
1539        __u32 type;
1540
1541        rc = -EACCES;
1542        xid = get_xid();
1543
1544        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1545                 cmd, flock->fl_flags, flock->fl_type,
1546                 flock->fl_start, flock->fl_end);
1547
1548        cfile = (struct cifsFileInfo *)file->private_data;
1549        tcon = tlink_tcon(cfile->tlink);
1550
1551        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1552                        tcon->ses->server);
1553
1554        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1555        netfid = cfile->fid.netfid;
1556        cinode = CIFS_I(file_inode(file));
1557
1558        if (cap_unix(tcon->ses) &&
1559            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1560            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1561                posix_lck = true;
1562        /*
1563         * BB add code here to normalize offset and length to account for
1564         * negative length which we can not accept over the wire.
1565         */
1566        if (IS_GETLK(cmd)) {
1567                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1568                free_xid(xid);
1569                return rc;
1570        }
1571
1572        if (!lock && !unlock) {
1573                /*
1574                 * if no lock or unlock then nothing to do since we do not
1575                 * know what it is
1576                 */
1577                free_xid(xid);
1578                return -EOPNOTSUPP;
1579        }
1580
1581        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1582                        xid);
1583        free_xid(xid);
1584        return rc;
1585}
1586
1587/*
1588 * update the file size (if needed) after a write. Should be called with
1589 * the inode->i_lock held
1590 */
1591void
1592cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1593                      unsigned int bytes_written)
1594{
1595        loff_t end_of_write = offset + bytes_written;
1596
1597        if (end_of_write > cifsi->server_eof)
1598                cifsi->server_eof = end_of_write;
1599}
1600
1601static ssize_t
1602cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1603           size_t write_size, loff_t *offset)
1604{
1605        int rc = 0;
1606        unsigned int bytes_written = 0;
1607        unsigned int total_written;
1608        struct cifs_sb_info *cifs_sb;
1609        struct cifs_tcon *tcon;
1610        struct TCP_Server_Info *server;
1611        unsigned int xid;
1612        struct dentry *dentry = open_file->dentry;
1613        struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1614        struct cifs_io_parms io_parms;
1615
1616        cifs_sb = CIFS_SB(dentry->d_sb);
1617
1618        cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1619                 write_size, *offset, dentry->d_name.name);
1620
1621        tcon = tlink_tcon(open_file->tlink);
1622        server = tcon->ses->server;
1623
1624        if (!server->ops->sync_write)
1625                return -ENOSYS;
1626
1627        xid = get_xid();
1628
1629        for (total_written = 0; write_size > total_written;
1630             total_written += bytes_written) {
1631                rc = -EAGAIN;
1632                while (rc == -EAGAIN) {
1633                        struct kvec iov[2];
1634                        unsigned int len;
1635
1636                        if (open_file->invalidHandle) {
1637                                /* we could deadlock if we called
1638                                   filemap_fdatawait from here so tell
1639                                   reopen_file not to flush data to
1640                                   server now */
1641                                rc = cifs_reopen_file(open_file, false);
1642                                if (rc != 0)
1643                                        break;
1644                        }
1645
1646                        len = min((size_t)cifs_sb->wsize,
1647                                  write_size - total_written);
1648                        /* iov[0] is reserved for smb header */
1649                        iov[1].iov_base = (char *)write_data + total_written;
1650                        iov[1].iov_len = len;
1651                        io_parms.pid = pid;
1652                        io_parms.tcon = tcon;
1653                        io_parms.offset = *offset;
1654                        io_parms.length = len;
1655                        rc = server->ops->sync_write(xid, open_file, &io_parms,
1656                                                     &bytes_written, iov, 1);
1657                }
1658                if (rc || (bytes_written == 0)) {
1659                        if (total_written)
1660                                break;
1661                        else {
1662                                free_xid(xid);
1663                                return rc;
1664                        }
1665                } else {
1666                        spin_lock(&dentry->d_inode->i_lock);
1667                        cifs_update_eof(cifsi, *offset, bytes_written);
1668                        spin_unlock(&dentry->d_inode->i_lock);
1669                        *offset += bytes_written;
1670                }
1671        }
1672
1673        cifs_stats_bytes_written(tcon, total_written);
1674
1675        if (total_written > 0) {
1676                spin_lock(&dentry->d_inode->i_lock);
1677                if (*offset > dentry->d_inode->i_size)
1678                        i_size_write(dentry->d_inode, *offset);
1679                spin_unlock(&dentry->d_inode->i_lock);
1680        }
1681        mark_inode_dirty_sync(dentry->d_inode);
1682        free_xid(xid);
1683        return total_written;
1684}
1685
1686struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1687                                        bool fsuid_only)
1688{
1689        struct cifsFileInfo *open_file = NULL;
1690        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1691
1692        /* only filter by fsuid on multiuser mounts */
1693        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1694                fsuid_only = false;
1695
1696        spin_lock(&cifs_file_list_lock);
1697        /* we could simply get the first_list_entry since write-only entries
1698           are always at the end of the list but since the first entry might
1699           have a close pending, we go through the whole list */
1700        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1701                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1702                        continue;
1703                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1704                        if (!open_file->invalidHandle) {
1705                                /* found a good file */
1706                                /* lock it so it will not be closed on us */
1707                                cifsFileInfo_get_locked(open_file);
1708                                spin_unlock(&cifs_file_list_lock);
1709                                return open_file;
1710                        } /* else might as well continue, and look for
1711                             another, or simply have the caller reopen it
1712                             again rather than trying to fix this handle */
1713                } else /* write only file */
1714                        break; /* write only files are last so must be done */
1715        }
1716        spin_unlock(&cifs_file_list_lock);
1717        return NULL;
1718}
1719
1720struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1721                                        bool fsuid_only)
1722{
1723        struct cifsFileInfo *open_file, *inv_file = NULL;
1724        struct cifs_sb_info *cifs_sb;
1725        bool any_available = false;
1726        int rc;
1727        unsigned int refind = 0;
1728
1729        /* Having a null inode here (because mapping->host was set to zero by
1730        the VFS or MM) should not happen but we had reports of on oops (due to
1731        it being zero) during stress testcases so we need to check for it */
1732
1733        if (cifs_inode == NULL) {
1734                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1735                dump_stack();
1736                return NULL;
1737        }
1738
1739        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1740
1741        /* only filter by fsuid on multiuser mounts */
1742        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1743                fsuid_only = false;
1744
1745        spin_lock(&cifs_file_list_lock);
1746refind_writable:
1747        if (refind > MAX_REOPEN_ATT) {
1748                spin_unlock(&cifs_file_list_lock);
1749                return NULL;
1750        }
1751        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1752                if (!any_available && open_file->pid != current->tgid)
1753                        continue;
1754                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1755                        continue;
1756                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1757                        if (!open_file->invalidHandle) {
1758                                /* found a good writable file */
1759                                cifsFileInfo_get_locked(open_file);
1760                                spin_unlock(&cifs_file_list_lock);
1761                                return open_file;
1762                        } else {
1763                                if (!inv_file)
1764                                        inv_file = open_file;
1765                        }
1766                }
1767        }
1768        /* couldn't find useable FH with same pid, try any available */
1769        if (!any_available) {
1770                any_available = true;
1771                goto refind_writable;
1772        }
1773
1774        if (inv_file) {
1775                any_available = false;
1776                cifsFileInfo_get_locked(inv_file);
1777        }
1778
1779        spin_unlock(&cifs_file_list_lock);
1780
1781        if (inv_file) {
1782                rc = cifs_reopen_file(inv_file, false);
1783                if (!rc)
1784                        return inv_file;
1785                else {
1786                        spin_lock(&cifs_file_list_lock);
1787                        list_move_tail(&inv_file->flist,
1788                                        &cifs_inode->openFileList);
1789                        spin_unlock(&cifs_file_list_lock);
1790                        cifsFileInfo_put(inv_file);
1791                        spin_lock(&cifs_file_list_lock);
1792                        ++refind;
1793                        goto refind_writable;
1794                }
1795        }
1796
1797        return NULL;
1798}
1799
1800static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1801{
1802        struct address_space *mapping = page->mapping;
1803        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1804        char *write_data;
1805        int rc = -EFAULT;
1806        int bytes_written = 0;
1807        struct inode *inode;
1808        struct cifsFileInfo *open_file;
1809
1810        if (!mapping || !mapping->host)
1811                return -EFAULT;
1812
1813        inode = page->mapping->host;
1814
1815        offset += (loff_t)from;
1816        write_data = kmap(page);
1817        write_data += from;
1818
1819        if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1820                kunmap(page);
1821                return -EIO;
1822        }
1823
1824        /* racing with truncate? */
1825        if (offset > mapping->host->i_size) {
1826                kunmap(page);
1827                return 0; /* don't care */
1828        }
1829
1830        /* check to make sure that we are not extending the file */
1831        if (mapping->host->i_size - offset < (loff_t)to)
1832                to = (unsigned)(mapping->host->i_size - offset);
1833
1834        open_file = find_writable_file(CIFS_I(mapping->host), false);
1835        if (open_file) {
1836                bytes_written = cifs_write(open_file, open_file->pid,
1837                                           write_data, to - from, &offset);
1838                cifsFileInfo_put(open_file);
1839                /* Does mm or vfs already set times? */
1840                inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1841                if ((bytes_written > 0) && (offset))
1842                        rc = 0;
1843                else if (bytes_written < 0)
1844                        rc = bytes_written;
1845        } else {
1846                cifs_dbg(FYI, "No writeable filehandles for inode\n");
1847                rc = -EIO;
1848        }
1849
1850        kunmap(page);
1851        return rc;
1852}
1853
1854static int cifs_writepages(struct address_space *mapping,
1855                           struct writeback_control *wbc)
1856{
1857        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1858        bool done = false, scanned = false, range_whole = false;
1859        pgoff_t end, index;
1860        struct cifs_writedata *wdata;
1861        struct TCP_Server_Info *server;
1862        struct page *page;
1863        int rc = 0;
1864
1865        /*
1866         * If wsize is smaller than the page cache size, default to writing
1867         * one page at a time via cifs_writepage
1868         */
1869        if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1870                return generic_writepages(mapping, wbc);
1871
1872        if (wbc->range_cyclic) {
1873                index = mapping->writeback_index; /* Start from prev offset */
1874                end = -1;
1875        } else {
1876                index = wbc->range_start >> PAGE_CACHE_SHIFT;
1877                end = wbc->range_end >> PAGE_CACHE_SHIFT;
1878                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1879                        range_whole = true;
1880                scanned = true;
1881        }
1882retry:
1883        while (!done && index <= end) {
1884                unsigned int i, nr_pages, found_pages;
1885                pgoff_t next = 0, tofind;
1886                struct page **pages;
1887
1888                tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1889                                end - index) + 1;
1890
1891                wdata = cifs_writedata_alloc((unsigned int)tofind,
1892                                             cifs_writev_complete);
1893                if (!wdata) {
1894                        rc = -ENOMEM;
1895                        break;
1896                }
1897
1898                /*
1899                 * find_get_pages_tag seems to return a max of 256 on each
1900                 * iteration, so we must call it several times in order to
1901                 * fill the array or the wsize is effectively limited to
1902                 * 256 * PAGE_CACHE_SIZE.
1903                 */
1904                found_pages = 0;
1905                pages = wdata->pages;
1906                do {
1907                        nr_pages = find_get_pages_tag(mapping, &index,
1908                                                        PAGECACHE_TAG_DIRTY,
1909                                                        tofind, pages);
1910                        found_pages += nr_pages;
1911                        tofind -= nr_pages;
1912                        pages += nr_pages;
1913                } while (nr_pages && tofind && index <= end);
1914
1915                if (found_pages == 0) {
1916                        kref_put(&wdata->refcount, cifs_writedata_release);
1917                        break;
1918                }
1919
1920                nr_pages = 0;
1921                for (i = 0; i < found_pages; i++) {
1922                        page = wdata->pages[i];
1923                        /*
1924                         * At this point we hold neither mapping->tree_lock nor
1925                         * lock on the page itself: the page may be truncated or
1926                         * invalidated (changing page->mapping to NULL), or even
1927                         * swizzled back from swapper_space to tmpfs file
1928                         * mapping
1929                         */
1930
1931                        if (nr_pages == 0)
1932                                lock_page(page);
1933                        else if (!trylock_page(page))
1934                                break;
1935
1936                        if (unlikely(page->mapping != mapping)) {
1937                                unlock_page(page);
1938                                break;
1939                        }
1940
1941                        if (!wbc->range_cyclic && page->index > end) {
1942                                done = true;
1943                                unlock_page(page);
1944                                break;
1945                        }
1946
1947                        if (next && (page->index != next)) {
1948                                /* Not next consecutive page */
1949                                unlock_page(page);
1950                                break;
1951                        }
1952
1953                        if (wbc->sync_mode != WB_SYNC_NONE)
1954                                wait_on_page_writeback(page);
1955
1956                        if (PageWriteback(page) ||
1957                                        !clear_page_dirty_for_io(page)) {
1958                                unlock_page(page);
1959                                break;
1960                        }
1961
1962                        /*
1963                         * This actually clears the dirty bit in the radix tree.
1964                         * See cifs_writepage() for more commentary.
1965                         */
1966                        set_page_writeback(page);
1967
1968                        if (page_offset(page) >= i_size_read(mapping->host)) {
1969                                done = true;
1970                                unlock_page(page);
1971                                end_page_writeback(page);
1972                                break;
1973                        }
1974
1975                        wdata->pages[i] = page;
1976                        next = page->index + 1;
1977                        ++nr_pages;
1978                }
1979
1980                /* reset index to refind any pages skipped */
1981                if (nr_pages == 0)
1982                        index = wdata->pages[0]->index + 1;
1983
1984                /* put any pages we aren't going to use */
1985                for (i = nr_pages; i < found_pages; i++) {
1986                        page_cache_release(wdata->pages[i]);
1987                        wdata->pages[i] = NULL;
1988                }
1989
1990                /* nothing to write? */
1991                if (nr_pages == 0) {
1992                        kref_put(&wdata->refcount, cifs_writedata_release);
1993                        continue;
1994                }
1995
1996                wdata->sync_mode = wbc->sync_mode;
1997                wdata->nr_pages = nr_pages;
1998                wdata->offset = page_offset(wdata->pages[0]);
1999                wdata->pagesz = PAGE_CACHE_SIZE;
2000                wdata->tailsz =
2001                        min(i_size_read(mapping->host) -
2002                            page_offset(wdata->pages[nr_pages - 1]),
2003                            (loff_t)PAGE_CACHE_SIZE);
2004                wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2005                                        wdata->tailsz;
2006
2007                do {
2008                        if (wdata->cfile != NULL)
2009                                cifsFileInfo_put(wdata->cfile);
2010                        wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2011                                                          false);
2012                        if (!wdata->cfile) {
2013                                cifs_dbg(VFS, "No writable handles for inode\n");
2014                                rc = -EBADF;
2015                                break;
2016                        }
2017                        wdata->pid = wdata->cfile->pid;
2018                        server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2019                        rc = server->ops->async_writev(wdata);
2020                } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2021
2022                for (i = 0; i < nr_pages; ++i)
2023                        unlock_page(wdata->pages[i]);
2024
2025                /* send failure -- clean up the mess */
2026                if (rc != 0) {
2027                        for (i = 0; i < nr_pages; ++i) {
2028                                if (rc == -EAGAIN)
2029                                        redirty_page_for_writepage(wbc,
2030                                                           wdata->pages[i]);
2031                                else
2032                                        SetPageError(wdata->pages[i]);
2033                                end_page_writeback(wdata->pages[i]);
2034                                page_cache_release(wdata->pages[i]);
2035                        }
2036                        if (rc != -EAGAIN)
2037                                mapping_set_error(mapping, rc);
2038                }
2039                kref_put(&wdata->refcount, cifs_writedata_release);
2040
2041                wbc->nr_to_write -= nr_pages;
2042                if (wbc->nr_to_write <= 0)
2043                        done = true;
2044
2045                index = next;
2046        }
2047
2048        if (!scanned && !done) {
2049                /*
2050                 * We hit the last page and there is more work to be done: wrap
2051                 * back to the start of the file
2052                 */
2053                scanned = true;
2054                index = 0;
2055                goto retry;
2056        }
2057
2058        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2059                mapping->writeback_index = index;
2060
2061        return rc;
2062}
2063
2064static int
2065cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2066{
2067        int rc;
2068        unsigned int xid;
2069
2070        xid = get_xid();
2071/* BB add check for wbc flags */
2072        page_cache_get(page);
2073        if (!PageUptodate(page))
2074                cifs_dbg(FYI, "ppw - page not up to date\n");
2075
2076        /*
2077         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2078         *
2079         * A writepage() implementation always needs to do either this,
2080         * or re-dirty the page with "redirty_page_for_writepage()" in
2081         * the case of a failure.
2082         *
2083         * Just unlocking the page will cause the radix tree tag-bits
2084         * to fail to update with the state of the page correctly.
2085         */
2086        set_page_writeback(page);
2087retry_write:
2088        rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2089        if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2090                goto retry_write;
2091        else if (rc == -EAGAIN)
2092                redirty_page_for_writepage(wbc, page);
2093        else if (rc != 0)
2094                SetPageError(page);
2095        else
2096                SetPageUptodate(page);
2097        end_page_writeback(page);
2098        page_cache_release(page);
2099        free_xid(xid);
2100        return rc;
2101}
2102
2103static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2104{
2105        int rc = cifs_writepage_locked(page, wbc);
2106        unlock_page(page);
2107        return rc;
2108}
2109
2110static int cifs_write_end(struct file *file, struct address_space *mapping,
2111                        loff_t pos, unsigned len, unsigned copied,
2112                        struct page *page, void *fsdata)
2113{
2114        int rc;
2115        struct inode *inode = mapping->host;
2116        struct cifsFileInfo *cfile = file->private_data;
2117        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2118        __u32 pid;
2119
2120        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2121                pid = cfile->pid;
2122        else
2123                pid = current->tgid;
2124
2125        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2126                 page, pos, copied);
2127
2128        if (PageChecked(page)) {
2129                if (copied == len)
2130                        SetPageUptodate(page);
2131                ClearPageChecked(page);
2132        } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2133                SetPageUptodate(page);
2134
2135        if (!PageUptodate(page)) {
2136                char *page_data;
2137                unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2138                unsigned int xid;
2139
2140                xid = get_xid();
2141                /* this is probably better than directly calling
2142                   partialpage_write since in this function the file handle is
2143                   known which we might as well leverage */
2144                /* BB check if anything else missing out of ppw
2145                   such as updating last write time */
2146                page_data = kmap(page);
2147                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2148                /* if (rc < 0) should we set writebehind rc? */
2149                kunmap(page);
2150
2151                free_xid(xid);
2152        } else {
2153                rc = copied;
2154                pos += copied;
2155                set_page_dirty(page);
2156        }
2157
2158        if (rc > 0) {
2159                spin_lock(&inode->i_lock);
2160                if (pos > inode->i_size)
2161                        i_size_write(inode, pos);
2162                spin_unlock(&inode->i_lock);
2163        }
2164
2165        unlock_page(page);
2166        page_cache_release(page);
2167
2168        return rc;
2169}
2170
2171int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2172                      int datasync)
2173{
2174        unsigned int xid;
2175        int rc = 0;
2176        struct cifs_tcon *tcon;
2177        struct TCP_Server_Info *server;
2178        struct cifsFileInfo *smbfile = file->private_data;
2179        struct inode *inode = file_inode(file);
2180        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2181
2182        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2183        if (rc)
2184                return rc;
2185        mutex_lock(&inode->i_mutex);
2186
2187        xid = get_xid();
2188
2189        cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2190                 file->f_path.dentry->d_name.name, datasync);
2191
2192        if (!CIFS_I(inode)->clientCanCacheRead) {
2193                rc = cifs_invalidate_mapping(inode);
2194                if (rc) {
2195                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2196                        rc = 0; /* don't care about it in fsync */
2197                }
2198        }
2199
2200        tcon = tlink_tcon(smbfile->tlink);
2201        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2202                server = tcon->ses->server;
2203                if (server->ops->flush)
2204                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2205                else
2206                        rc = -ENOSYS;
2207        }
2208
2209        free_xid(xid);
2210        mutex_unlock(&inode->i_mutex);
2211        return rc;
2212}
2213
2214int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2215{
2216        unsigned int xid;
2217        int rc = 0;
2218        struct cifs_tcon *tcon;
2219        struct TCP_Server_Info *server;
2220        struct cifsFileInfo *smbfile = file->private_data;
2221        struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2222        struct inode *inode = file->f_mapping->host;
2223
2224        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2225        if (rc)
2226                return rc;
2227        mutex_lock(&inode->i_mutex);
2228
2229        xid = get_xid();
2230
2231        cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2232                 file->f_path.dentry->d_name.name, datasync);
2233
2234        tcon = tlink_tcon(smbfile->tlink);
2235        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2236                server = tcon->ses->server;
2237                if (server->ops->flush)
2238                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2239                else
2240                        rc = -ENOSYS;
2241        }
2242
2243        free_xid(xid);
2244        mutex_unlock(&inode->i_mutex);
2245        return rc;
2246}
2247
2248/*
2249 * As file closes, flush all cached write data for this inode checking
2250 * for write behind errors.
2251 */
2252int cifs_flush(struct file *file, fl_owner_t id)
2253{
2254        struct inode *inode = file_inode(file);
2255        int rc = 0;
2256
2257        if (file->f_mode & FMODE_WRITE)
2258                rc = filemap_write_and_wait(inode->i_mapping);
2259
2260        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2261
2262        return rc;
2263}
2264
2265static int
2266cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2267{
2268        int rc = 0;
2269        unsigned long i;
2270
2271        for (i = 0; i < num_pages; i++) {
2272                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2273                if (!pages[i]) {
2274                        /*
2275                         * save number of pages we have already allocated and
2276                         * return with ENOMEM error
2277                         */
2278                        num_pages = i;
2279                        rc = -ENOMEM;
2280                        break;
2281                }
2282        }
2283
2284        if (rc) {
2285                for (i = 0; i < num_pages; i++)
2286                        put_page(pages[i]);
2287        }
2288        return rc;
2289}
2290
2291static inline
2292size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2293{
2294        size_t num_pages;
2295        size_t clen;
2296
2297        clen = min_t(const size_t, len, wsize);
2298        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2299
2300        if (cur_len)
2301                *cur_len = clen;
2302
2303        return num_pages;
2304}
2305
2306static void
2307cifs_uncached_writev_complete(struct work_struct *work)
2308{
2309        int i;
2310        struct cifs_writedata *wdata = container_of(work,
2311                                        struct cifs_writedata, work);
2312        struct inode *inode = wdata->cfile->dentry->d_inode;
2313        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2314
2315        spin_lock(&inode->i_lock);
2316        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2317        if (cifsi->server_eof > inode->i_size)
2318                i_size_write(inode, cifsi->server_eof);
2319        spin_unlock(&inode->i_lock);
2320
2321        complete(&wdata->done);
2322
2323        if (wdata->result != -EAGAIN) {
2324                for (i = 0; i < wdata->nr_pages; i++)
2325                        put_page(wdata->pages[i]);
2326        }
2327
2328        kref_put(&wdata->refcount, cifs_writedata_release);
2329}
2330
2331/* attempt to send write to server, retry on any -EAGAIN errors */
2332static int
2333cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2334{
2335        int rc;
2336        struct TCP_Server_Info *server;
2337
2338        server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2339
2340        do {
2341                if (wdata->cfile->invalidHandle) {
2342                        rc = cifs_reopen_file(wdata->cfile, false);
2343                        if (rc != 0)
2344                                continue;
2345                }
2346                rc = server->ops->async_writev(wdata);
2347        } while (rc == -EAGAIN);
2348
2349        return rc;
2350}
2351
2352static ssize_t
2353cifs_iovec_write(struct file *file, const struct iovec *iov,
2354                 unsigned long nr_segs, loff_t *poffset)
2355{
2356        unsigned long nr_pages, i;
2357        size_t copied, len, cur_len;
2358        ssize_t total_written = 0;
2359        loff_t offset;
2360        struct iov_iter it;
2361        struct cifsFileInfo *open_file;
2362        struct cifs_tcon *tcon;
2363        struct cifs_sb_info *cifs_sb;
2364        struct cifs_writedata *wdata, *tmp;
2365        struct list_head wdata_list;
2366        int rc;
2367        pid_t pid;
2368
2369        len = iov_length(iov, nr_segs);
2370        if (!len)
2371                return 0;
2372
2373        rc = generic_write_checks(file, poffset, &len, 0);
2374        if (rc)
2375                return rc;
2376
2377        INIT_LIST_HEAD(&wdata_list);
2378        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2379        open_file = file->private_data;
2380        tcon = tlink_tcon(open_file->tlink);
2381
2382        if (!tcon->ses->server->ops->async_writev)
2383                return -ENOSYS;
2384
2385        offset = *poffset;
2386
2387        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2388                pid = open_file->pid;
2389        else
2390                pid = current->tgid;
2391
2392        iov_iter_init(&it, iov, nr_segs, len, 0);
2393        do {
2394                size_t save_len;
2395
2396                nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2397                wdata = cifs_writedata_alloc(nr_pages,
2398                                             cifs_uncached_writev_complete);
2399                if (!wdata) {
2400                        rc = -ENOMEM;
2401                        break;
2402                }
2403
2404                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2405                if (rc) {
2406                        kfree(wdata);
2407                        break;
2408                }
2409
2410                save_len = cur_len;
2411                for (i = 0; i < nr_pages; i++) {
2412                        copied = min_t(const size_t, cur_len, PAGE_SIZE);
2413                        copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2414                                                         0, copied);
2415                        cur_len -= copied;
2416                        iov_iter_advance(&it, copied);
2417                }
2418                cur_len = save_len - cur_len;
2419
2420                wdata->sync_mode = WB_SYNC_ALL;
2421                wdata->nr_pages = nr_pages;
2422                wdata->offset = (__u64)offset;
2423                wdata->cfile = cifsFileInfo_get(open_file);
2424                wdata->pid = pid;
2425                wdata->bytes = cur_len;
2426                wdata->pagesz = PAGE_SIZE;
2427                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2428                rc = cifs_uncached_retry_writev(wdata);
2429                if (rc) {
2430                        kref_put(&wdata->refcount, cifs_writedata_release);
2431                        break;
2432                }
2433
2434                list_add_tail(&wdata->list, &wdata_list);
2435                offset += cur_len;
2436                len -= cur_len;
2437        } while (len > 0);
2438
2439        /*
2440         * If at least one write was successfully sent, then discard any rc
2441         * value from the later writes. If the other write succeeds, then
2442         * we'll end up returning whatever was written. If it fails, then
2443         * we'll get a new rc value from that.
2444         */
2445        if (!list_empty(&wdata_list))
2446                rc = 0;
2447
2448        /*
2449         * Wait for and collect replies for any successful sends in order of
2450         * increasing offset. Once an error is hit or we get a fatal signal
2451         * while waiting, then return without waiting for any more replies.
2452         */
2453restart_loop:
2454        list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2455                if (!rc) {
2456                        /* FIXME: freezable too? */
2457                        rc = wait_for_completion_killable(&wdata->done);
2458                        if (rc)
2459                                rc = -EINTR;
2460                        else if (wdata->result)
2461                                rc = wdata->result;
2462                        else
2463                                total_written += wdata->bytes;
2464
2465                        /* resend call if it's a retryable error */
2466                        if (rc == -EAGAIN) {
2467                                rc = cifs_uncached_retry_writev(wdata);
2468                                goto restart_loop;
2469                        }
2470                }
2471                list_del_init(&wdata->list);
2472                kref_put(&wdata->refcount, cifs_writedata_release);
2473        }
2474
2475        if (total_written > 0)
2476                *poffset += total_written;
2477
2478        cifs_stats_bytes_written(tcon, total_written);
2479        return total_written ? total_written : (ssize_t)rc;
2480}
2481
2482ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2483                                unsigned long nr_segs, loff_t pos)
2484{
2485        ssize_t written;
2486        struct inode *inode;
2487
2488        inode = file_inode(iocb->ki_filp);
2489
2490        /*
2491         * BB - optimize the way when signing is disabled. We can drop this
2492         * extra memory-to-memory copying and use iovec buffers for constructing
2493         * write request.
2494         */
2495
2496        written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2497        if (written > 0) {
2498                CIFS_I(inode)->invalid_mapping = true;
2499                iocb->ki_pos = pos;
2500        }
2501
2502        return written;
2503}
2504
2505static ssize_t
2506cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2507            unsigned long nr_segs, loff_t pos)
2508{
2509        struct file *file = iocb->ki_filp;
2510        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2511        struct inode *inode = file->f_mapping->host;
2512        struct cifsInodeInfo *cinode = CIFS_I(inode);
2513        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2514        ssize_t rc = -EACCES;
2515
2516        BUG_ON(iocb->ki_pos != pos);
2517
2518        /*
2519         * We need to hold the sem to be sure nobody modifies lock list
2520         * with a brlock that prevents writing.
2521         */
2522        down_read(&cinode->lock_sem);
2523        if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2524                                     server->vals->exclusive_lock_type, NULL,
2525                                     CIFS_WRITE_OP)) {
2526                mutex_lock(&inode->i_mutex);
2527                rc = __generic_file_aio_write(iocb, iov, nr_segs,
2528                                               &iocb->ki_pos);
2529                mutex_unlock(&inode->i_mutex);
2530        }
2531
2532        if (rc > 0 || rc == -EIOCBQUEUED) {
2533                ssize_t err;
2534
2535                err = generic_write_sync(file, pos, rc);
2536                if (err < 0 && rc > 0)
2537                        rc = err;
2538        }
2539
2540        up_read(&cinode->lock_sem);
2541        return rc;
2542}
2543
2544ssize_t
2545cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2546                   unsigned long nr_segs, loff_t pos)
2547{
2548        struct inode *inode = file_inode(iocb->ki_filp);
2549        struct cifsInodeInfo *cinode = CIFS_I(inode);
2550        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2551        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2552                                                iocb->ki_filp->private_data;
2553        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2554        ssize_t written;
2555
2556        if (cinode->clientCanCacheAll) {
2557                if (cap_unix(tcon->ses) &&
2558                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2559                    && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2560                        return generic_file_aio_write(iocb, iov, nr_segs, pos);
2561                return cifs_writev(iocb, iov, nr_segs, pos);
2562        }
2563        /*
2564         * For non-oplocked files in strict cache mode we need to write the data
2565         * to the server exactly from the pos to pos+len-1 rather than flush all
2566         * affected pages because it may cause a error with mandatory locks on
2567         * these pages but not on the region from pos to ppos+len-1.
2568         */
2569        written = cifs_user_writev(iocb, iov, nr_segs, pos);
2570        if (written > 0 && cinode->clientCanCacheRead) {
2571                /*
2572                 * Windows 7 server can delay breaking level2 oplock if a write
2573                 * request comes - break it on the client to prevent reading
2574                 * an old data.
2575                 */
2576                cifs_invalidate_mapping(inode);
2577                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2578                         inode);
2579                cinode->clientCanCacheRead = false;
2580        }
2581        return written;
2582}
2583
2584static struct cifs_readdata *
2585cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2586{
2587        struct cifs_readdata *rdata;
2588
2589        rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2590                        GFP_KERNEL);
2591        if (rdata != NULL) {
2592                kref_init(&rdata->refcount);
2593                INIT_LIST_HEAD(&rdata->list);
2594                init_completion(&rdata->done);
2595                INIT_WORK(&rdata->work, complete);
2596        }
2597
2598        return rdata;
2599}
2600
2601void
2602cifs_readdata_release(struct kref *refcount)
2603{
2604        struct cifs_readdata *rdata = container_of(refcount,
2605                                        struct cifs_readdata, refcount);
2606
2607        if (rdata->cfile)
2608                cifsFileInfo_put(rdata->cfile);
2609
2610        kfree(rdata);
2611}
2612
2613static int
2614cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2615{
2616        int rc = 0;
2617        struct page *page;
2618        unsigned int i;
2619
2620        for (i = 0; i < nr_pages; i++) {
2621                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2622                if (!page) {
2623                        rc = -ENOMEM;
2624                        break;
2625                }
2626                rdata->pages[i] = page;
2627        }
2628
2629        if (rc) {
2630                for (i = 0; i < nr_pages; i++) {
2631                        put_page(rdata->pages[i]);
2632                        rdata->pages[i] = NULL;
2633                }
2634        }
2635        return rc;
2636}
2637
2638static void
2639cifs_uncached_readdata_release(struct kref *refcount)
2640{
2641        struct cifs_readdata *rdata = container_of(refcount,
2642                                        struct cifs_readdata, refcount);
2643        unsigned int i;
2644
2645        for (i = 0; i < rdata->nr_pages; i++) {
2646                put_page(rdata->pages[i]);
2647                rdata->pages[i] = NULL;
2648        }
2649        cifs_readdata_release(refcount);
2650}
2651
2652static int
2653cifs_retry_async_readv(struct cifs_readdata *rdata)
2654{
2655        int rc;
2656        struct TCP_Server_Info *server;
2657
2658        server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2659
2660        do {
2661                if (rdata->cfile->invalidHandle) {
2662                        rc = cifs_reopen_file(rdata->cfile, true);
2663                        if (rc != 0)
2664                                continue;
2665                }
2666                rc = server->ops->async_readv(rdata);
2667        } while (rc == -EAGAIN);
2668
2669        return rc;
2670}
2671
2672/**
2673 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2674 * @rdata:      the readdata response with list of pages holding data
2675 * @iov:        vector in which we should copy the data
2676 * @nr_segs:    number of segments in vector
2677 * @offset:     offset into file of the first iovec
2678 * @copied:     used to return the amount of data copied to the iov
2679 *
2680 * This function copies data from a list of pages in a readdata response into
2681 * an array of iovecs. It will first calculate where the data should go
2682 * based on the info in the readdata and then copy the data into that spot.
2683 */
2684static ssize_t
2685cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2686                        unsigned long nr_segs, loff_t offset, ssize_t *copied)
2687{
2688        int rc = 0;
2689        struct iov_iter ii;
2690        size_t pos = rdata->offset - offset;
2691        ssize_t remaining = rdata->bytes;
2692        unsigned char *pdata;
2693        unsigned int i;
2694
2695        /* set up iov_iter and advance to the correct offset */
2696        iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2697        iov_iter_advance(&ii, pos);
2698
2699        *copied = 0;
2700        for (i = 0; i < rdata->nr_pages; i++) {
2701                ssize_t copy;
2702                struct page *page = rdata->pages[i];
2703
2704                /* copy a whole page or whatever's left */
2705                copy = min_t(ssize_t, remaining, PAGE_SIZE);
2706
2707                /* ...but limit it to whatever space is left in the iov */
2708                copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2709
2710                /* go while there's data to be copied and no errors */
2711                if (copy && !rc) {
2712                        pdata = kmap(page);
2713                        rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2714                                                (int)copy);
2715                        kunmap(page);
2716                        if (!rc) {
2717                                *copied += copy;
2718                                remaining -= copy;
2719                                iov_iter_advance(&ii, copy);
2720                        }
2721                }
2722        }
2723
2724        return rc;
2725}
2726
2727static void
2728cifs_uncached_readv_complete(struct work_struct *work)
2729{
2730        struct cifs_readdata *rdata = container_of(work,
2731                                                struct cifs_readdata, work);
2732
2733        complete(&rdata->done);
2734        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2735}
2736
2737static int
2738cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2739                        struct cifs_readdata *rdata, unsigned int len)
2740{
2741        int total_read = 0, result = 0;
2742        unsigned int i;
2743        unsigned int nr_pages = rdata->nr_pages;
2744        struct kvec iov;
2745
2746        rdata->tailsz = PAGE_SIZE;
2747        for (i = 0; i < nr_pages; i++) {
2748                struct page *page = rdata->pages[i];
2749
2750                if (len >= PAGE_SIZE) {
2751                        /* enough data to fill the page */
2752                        iov.iov_base = kmap(page);
2753                        iov.iov_len = PAGE_SIZE;
2754                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2755                                 i, iov.iov_base, iov.iov_len);
2756                        len -= PAGE_SIZE;
2757                } else if (len > 0) {
2758                        /* enough for partial page, fill and zero the rest */
2759                        iov.iov_base = kmap(page);
2760                        iov.iov_len = len;
2761                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2762                                 i, iov.iov_base, iov.iov_len);
2763                        memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2764                        rdata->tailsz = len;
2765                        len = 0;
2766                } else {
2767                        /* no need to hold page hostage */
2768                        rdata->pages[i] = NULL;
2769                        rdata->nr_pages--;
2770                        put_page(page);
2771                        continue;
2772                }
2773
2774                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2775                kunmap(page);
2776                if (result < 0)
2777                        break;
2778
2779                total_read += result;
2780        }
2781
2782        return total_read > 0 ? total_read : result;
2783}
2784
2785static ssize_t
2786cifs_iovec_read(struct file *file, const struct iovec *iov,
2787                 unsigned long nr_segs, loff_t *poffset)
2788{
2789        ssize_t rc;
2790        size_t len, cur_len;
2791        ssize_t total_read = 0;
2792        loff_t offset = *poffset;
2793        unsigned int npages;
2794        struct cifs_sb_info *cifs_sb;
2795        struct cifs_tcon *tcon;
2796        struct cifsFileInfo *open_file;
2797        struct cifs_readdata *rdata, *tmp;
2798        struct list_head rdata_list;
2799        pid_t pid;
2800
2801        if (!nr_segs)
2802                return 0;
2803
2804        len = iov_length(iov, nr_segs);
2805        if (!len)
2806                return 0;
2807
2808        INIT_LIST_HEAD(&rdata_list);
2809        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2810        open_file = file->private_data;
2811        tcon = tlink_tcon(open_file->tlink);
2812
2813        if (!tcon->ses->server->ops->async_readv)
2814                return -ENOSYS;
2815
2816        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2817                pid = open_file->pid;
2818        else
2819                pid = current->tgid;
2820
2821        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2822                cifs_dbg(FYI, "attempting read on write only file instance\n");
2823
2824        do {
2825                cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2826                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2827
2828                /* allocate a readdata struct */
2829                rdata = cifs_readdata_alloc(npages,
2830                                            cifs_uncached_readv_complete);
2831                if (!rdata) {
2832                        rc = -ENOMEM;
2833                        goto error;
2834                }
2835
2836                rc = cifs_read_allocate_pages(rdata, npages);
2837                if (rc)
2838                        goto error;
2839
2840                rdata->cfile = cifsFileInfo_get(open_file);
2841                rdata->nr_pages = npages;
2842                rdata->offset = offset;
2843                rdata->bytes = cur_len;
2844                rdata->pid = pid;
2845                rdata->pagesz = PAGE_SIZE;
2846                rdata->read_into_pages = cifs_uncached_read_into_pages;
2847
2848                rc = cifs_retry_async_readv(rdata);
2849error:
2850                if (rc) {
2851                        kref_put(&rdata->refcount,
2852                                 cifs_uncached_readdata_release);
2853                        break;
2854                }
2855
2856                list_add_tail(&rdata->list, &rdata_list);
2857                offset += cur_len;
2858                len -= cur_len;
2859        } while (len > 0);
2860
2861        /* if at least one read request send succeeded, then reset rc */
2862        if (!list_empty(&rdata_list))
2863                rc = 0;
2864
2865        /* the loop below should proceed in the order of increasing offsets */
2866restart_loop:
2867        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2868                if (!rc) {
2869                        ssize_t copied;
2870
2871                        /* FIXME: freezable sleep too? */
2872                        rc = wait_for_completion_killable(&rdata->done);
2873                        if (rc)
2874                                rc = -EINTR;
2875                        else if (rdata->result)
2876                                rc = rdata->result;
2877                        else {
2878                                rc = cifs_readdata_to_iov(rdata, iov,
2879                                                        nr_segs, *poffset,
2880                                                        &copied);
2881                                total_read += copied;
2882                        }
2883
2884                        /* resend call if it's a retryable error */
2885                        if (rc == -EAGAIN) {
2886                                rc = cifs_retry_async_readv(rdata);
2887                                goto restart_loop;
2888                        }
2889                }
2890                list_del_init(&rdata->list);
2891                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2892        }
2893
2894        cifs_stats_bytes_read(tcon, total_read);
2895        *poffset += total_read;
2896
2897        /* mask nodata case */
2898        if (rc == -ENODATA)
2899                rc = 0;
2900
2901        return total_read ? total_read : rc;
2902}
2903
2904ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2905                               unsigned long nr_segs, loff_t pos)
2906{
2907        ssize_t read;
2908
2909        read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2910        if (read > 0)
2911                iocb->ki_pos = pos;
2912
2913        return read;
2914}
2915
2916ssize_t
2917cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2918                  unsigned long nr_segs, loff_t pos)
2919{
2920        struct inode *inode = file_inode(iocb->ki_filp);
2921        struct cifsInodeInfo *cinode = CIFS_I(inode);
2922        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2923        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2924                                                iocb->ki_filp->private_data;
2925        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2926        int rc = -EACCES;
2927
2928        /*
2929         * In strict cache mode we need to read from the server all the time
2930         * if we don't have level II oplock because the server can delay mtime
2931         * change - so we can't make a decision about inode invalidating.
2932         * And we can also fail with pagereading if there are mandatory locks
2933         * on pages affected by this read but not on the region from pos to
2934         * pos+len-1.
2935         */
2936        if (!cinode->clientCanCacheRead)
2937                return cifs_user_readv(iocb, iov, nr_segs, pos);
2938
2939        if (cap_unix(tcon->ses) &&
2940            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2941            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2942                return generic_file_aio_read(iocb, iov, nr_segs, pos);
2943
2944        /*
2945         * We need to hold the sem to be sure nobody modifies lock list
2946         * with a brlock that prevents reading.
2947         */
2948        down_read(&cinode->lock_sem);
2949        if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2950                                     tcon->ses->server->vals->shared_lock_type,
2951                                     NULL, CIFS_READ_OP))
2952                rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2953        up_read(&cinode->lock_sem);
2954        return rc;
2955}
2956
2957static ssize_t
2958cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2959{
2960        int rc = -EACCES;
2961        unsigned int bytes_read = 0;
2962        unsigned int total_read;
2963        unsigned int current_read_size;
2964        unsigned int rsize;
2965        struct cifs_sb_info *cifs_sb;
2966        struct cifs_tcon *tcon;
2967        struct TCP_Server_Info *server;
2968        unsigned int xid;
2969        char *cur_offset;
2970        struct cifsFileInfo *open_file;
2971        struct cifs_io_parms io_parms;
2972        int buf_type = CIFS_NO_BUFFER;
2973        __u32 pid;
2974
2975        xid = get_xid();
2976        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2977
2978        /* FIXME: set up handlers for larger reads and/or convert to async */
2979        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2980
2981        if (file->private_data == NULL) {
2982                rc = -EBADF;
2983                free_xid(xid);
2984                return rc;
2985        }
2986        open_file = file->private_data;
2987        tcon = tlink_tcon(open_file->tlink);
2988        server = tcon->ses->server;
2989
2990        if (!server->ops->sync_read) {
2991                free_xid(xid);
2992                return -ENOSYS;
2993        }
2994
2995        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2996                pid = open_file->pid;
2997        else
2998                pid = current->tgid;
2999
3000        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3001                cifs_dbg(FYI, "attempting read on write only file instance\n");
3002
3003        for (total_read = 0, cur_offset = read_data; read_size > total_read;
3004             total_read += bytes_read, cur_offset += bytes_read) {
3005                current_read_size = min_t(uint, read_size - total_read, rsize);
3006                /*
3007                 * For windows me and 9x we do not want to request more than it
3008                 * negotiated since it will refuse the read then.
3009                 */
3010                if ((tcon->ses) && !(tcon->ses->capabilities &
3011                                tcon->ses->server->vals->cap_large_files)) {
3012                        current_read_size = min_t(uint, current_read_size,
3013                                        CIFSMaxBufSize);
3014                }
3015                rc = -EAGAIN;
3016                while (rc == -EAGAIN) {
3017                        if (open_file->invalidHandle) {
3018                                rc = cifs_reopen_file(open_file, true);
3019                                if (rc != 0)
3020                                        break;
3021                        }
3022                        io_parms.pid = pid;
3023                        io_parms.tcon = tcon;
3024                        io_parms.offset = *offset;
3025                        io_parms.length = current_read_size;
3026                        rc = server->ops->sync_read(xid, open_file, &io_parms,
3027                                                    &bytes_read, &cur_offset,
3028                                                    &buf_type);
3029                }
3030                if (rc || (bytes_read == 0)) {
3031                        if (total_read) {
3032                                break;
3033                        } else {
3034                                free_xid(xid);
3035                                return rc;
3036                        }
3037                } else {
3038                        cifs_stats_bytes_read(tcon, total_read);
3039                        *offset += bytes_read;
3040                }
3041        }
3042        free_xid(xid);
3043        return total_read;
3044}
3045
3046/*
3047 * If the page is mmap'ed into a process' page tables, then we need to make
3048 * sure that it doesn't change while being written back.
3049 */
3050static int
3051cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3052{
3053        struct page *page = vmf->page;
3054
3055        lock_page(page);
3056        return VM_FAULT_LOCKED;
3057}
3058
3059static struct vm_operations_struct cifs_file_vm_ops = {
3060        .fault = filemap_fault,
3061        .page_mkwrite = cifs_page_mkwrite,
3062        .remap_pages = generic_file_remap_pages,
3063};
3064
3065int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3066{
3067        int rc, xid;
3068        struct inode *inode = file_inode(file);
3069
3070        xid = get_xid();
3071
3072        if (!CIFS_I(inode)->clientCanCacheRead) {
3073                rc = cifs_invalidate_mapping(inode);
3074                if (rc)
3075                        return rc;
3076        }
3077
3078        rc = generic_file_mmap(file, vma);
3079        if (rc == 0)
3080                vma->vm_ops = &cifs_file_vm_ops;
3081        free_xid(xid);
3082        return rc;
3083}
3084
3085int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3086{
3087        int rc, xid;
3088
3089        xid = get_xid();
3090        rc = cifs_revalidate_file(file);
3091        if (rc) {
3092                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3093                         rc);
3094                free_xid(xid);
3095                return rc;
3096        }
3097        rc = generic_file_mmap(file, vma);
3098        if (rc == 0)
3099                vma->vm_ops = &cifs_file_vm_ops;
3100        free_xid(xid);
3101        return rc;
3102}
3103
3104static void
3105cifs_readv_complete(struct work_struct *work)
3106{
3107        unsigned int i;
3108        struct cifs_readdata *rdata = container_of(work,
3109                                                struct cifs_readdata, work);
3110
3111        for (i = 0; i < rdata->nr_pages; i++) {
3112                struct page *page = rdata->pages[i];
3113
3114                lru_cache_add_file(page);
3115
3116                if (rdata->result == 0) {
3117                        flush_dcache_page(page);
3118                        SetPageUptodate(page);
3119                }
3120
3121                unlock_page(page);
3122
3123                if (rdata->result == 0)
3124                        cifs_readpage_to_fscache(rdata->mapping->host, page);
3125
3126                page_cache_release(page);
3127                rdata->pages[i] = NULL;
3128        }
3129        kref_put(&rdata->refcount, cifs_readdata_release);
3130}
3131
3132static int
3133cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3134                        struct cifs_readdata *rdata, unsigned int len)
3135{
3136        int total_read = 0, result = 0;
3137        unsigned int i;
3138        u64 eof;
3139        pgoff_t eof_index;
3140        unsigned int nr_pages = rdata->nr_pages;
3141        struct kvec iov;
3142
3143        /* determine the eof that the server (probably) has */
3144        eof = CIFS_I(rdata->mapping->host)->server_eof;
3145        eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3146        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3147
3148        rdata->tailsz = PAGE_CACHE_SIZE;
3149        for (i = 0; i < nr_pages; i++) {
3150                struct page *page = rdata->pages[i];
3151
3152                if (len >= PAGE_CACHE_SIZE) {
3153                        /* enough data to fill the page */
3154                        iov.iov_base = kmap(page);
3155                        iov.iov_len = PAGE_CACHE_SIZE;
3156                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3157                                 i, page->index, iov.iov_base, iov.iov_len);
3158                        len -= PAGE_CACHE_SIZE;
3159                } else if (len > 0) {
3160                        /* enough for partial page, fill and zero the rest */
3161                        iov.iov_base = kmap(page);
3162                        iov.iov_len = len;
3163                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3164                                 i, page->index, iov.iov_base, iov.iov_len);
3165                        memset(iov.iov_base + len,
3166                                '\0', PAGE_CACHE_SIZE - len);
3167                        rdata->tailsz = len;
3168                        len = 0;
3169                } else if (page->index > eof_index) {
3170                        /*
3171                         * The VFS will not try to do readahead past the
3172                         * i_size, but it's possible that we have outstanding
3173                         * writes with gaps in the middle and the i_size hasn't
3174                         * caught up yet. Populate those with zeroed out pages
3175                         * to prevent the VFS from repeatedly attempting to
3176                         * fill them until the writes are flushed.
3177                         */
3178                        zero_user(page, 0, PAGE_CACHE_SIZE);
3179                        lru_cache_add_file(page);
3180                        flush_dcache_page(page);
3181                        SetPageUptodate(page);
3182                        unlock_page(page);
3183                        page_cache_release(page);
3184                        rdata->pages[i] = NULL;
3185                        rdata->nr_pages--;
3186                        continue;
3187                } else {
3188                        /* no need to hold page hostage */
3189                        lru_cache_add_file(page);
3190                        unlock_page(page);
3191                        page_cache_release(page);
3192                        rdata->pages[i] = NULL;
3193                        rdata->nr_pages--;
3194                        continue;
3195                }
3196
3197                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3198                kunmap(page);
3199                if (result < 0)
3200                        break;
3201
3202                total_read += result;
3203        }
3204
3205        return total_read > 0 ? total_read : result;
3206}
3207
3208static int cifs_readpages(struct file *file, struct address_space *mapping,
3209        struct list_head *page_list, unsigned num_pages)
3210{
3211        int rc;
3212        struct list_head tmplist;
3213        struct cifsFileInfo *open_file = file->private_data;
3214        struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3215        unsigned int rsize = cifs_sb->rsize;
3216        pid_t pid;
3217
3218        /*
3219         * Give up immediately if rsize is too small to read an entire page.
3220         * The VFS will fall back to readpage. We should never reach this
3221         * point however since we set ra_pages to 0 when the rsize is smaller
3222         * than a cache page.
3223         */
3224        if (unlikely(rsize < PAGE_CACHE_SIZE))
3225                return 0;
3226
3227        /*
3228         * Reads as many pages as possible from fscache. Returns -ENOBUFS
3229         * immediately if the cookie is negative
3230         */
3231        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3232                                         &num_pages);
3233        if (rc == 0)
3234                return rc;
3235
3236        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3237                pid = open_file->pid;
3238        else
3239                pid = current->tgid;
3240
3241        rc = 0;
3242        INIT_LIST_HEAD(&tmplist);
3243
3244        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3245                 __func__, file, mapping, num_pages);
3246
3247        /*
3248         * Start with the page at end of list and move it to private
3249         * list. Do the same with any following pages until we hit
3250         * the rsize limit, hit an index discontinuity, or run out of
3251         * pages. Issue the async read and then start the loop again
3252         * until the list is empty.
3253         *
3254         * Note that list order is important. The page_list is in
3255         * the order of declining indexes. When we put the pages in
3256         * the rdata->pages, then we want them in increasing order.
3257         */
3258        while (!list_empty(page_list)) {
3259                unsigned int i;
3260                unsigned int bytes = PAGE_CACHE_SIZE;
3261                unsigned int expected_index;
3262                unsigned int nr_pages = 1;
3263                loff_t offset;
3264                struct page *page, *tpage;
3265                struct cifs_readdata *rdata;
3266
3267                page = list_entry(page_list->prev, struct page, lru);
3268
3269                /*
3270                 * Lock the page and put it in the cache. Since no one else
3271                 * should have access to this page, we're safe to simply set
3272                 * PG_locked without checking it first.
3273                 */
3274                __set_page_locked(page);
3275                rc = add_to_page_cache_locked(page, mapping,
3276                                              page->index, GFP_KERNEL);
3277
3278                /* give up if we can't stick it in the cache */
3279                if (rc) {
3280                        __clear_page_locked(page);
3281                        break;
3282                }
3283
3284                /* move first page to the tmplist */
3285                offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3286                list_move_tail(&page->lru, &tmplist);
3287
3288                /* now try and add more pages onto the request */
3289                expected_index = page->index + 1;
3290                list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3291                        /* discontinuity ? */
3292                        if (page->index != expected_index)
3293                                break;
3294
3295                        /* would this page push the read over the rsize? */
3296                        if (bytes + PAGE_CACHE_SIZE > rsize)
3297                                break;
3298
3299                        __set_page_locked(page);
3300                        if (add_to_page_cache_locked(page, mapping,
3301                                                page->index, GFP_KERNEL)) {
3302                                __clear_page_locked(page);
3303                                break;
3304                        }
3305                        list_move_tail(&page->lru, &tmplist);
3306                        bytes += PAGE_CACHE_SIZE;
3307                        expected_index++;
3308                        nr_pages++;
3309                }
3310
3311                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3312                if (!rdata) {
3313                        /* best to give up if we're out of mem */
3314                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3315                                list_del(&page->lru);
3316                                lru_cache_add_file(page);
3317                                unlock_page(page);
3318                                page_cache_release(page);
3319                        }
3320                        rc = -ENOMEM;
3321                        break;
3322                }
3323
3324                rdata->cfile = cifsFileInfo_get(open_file);
3325                rdata->mapping = mapping;
3326                rdata->offset = offset;
3327                rdata->bytes = bytes;
3328                rdata->pid = pid;
3329                rdata->pagesz = PAGE_CACHE_SIZE;
3330                rdata->read_into_pages = cifs_readpages_read_into_pages;
3331
3332                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3333                        list_del(&page->lru);
3334                        rdata->pages[rdata->nr_pages++] = page;
3335                }
3336
3337                rc = cifs_retry_async_readv(rdata);
3338                if (rc != 0) {
3339                        for (i = 0; i < rdata->nr_pages; i++) {
3340                                page = rdata->pages[i];
3341                                lru_cache_add_file(page);
3342                                unlock_page(page);
3343                                page_cache_release(page);
3344                        }
3345                        kref_put(&rdata->refcount, cifs_readdata_release);
3346                        break;
3347                }
3348
3349                kref_put(&rdata->refcount, cifs_readdata_release);
3350        }
3351
3352        return rc;
3353}
3354
3355static int cifs_readpage_worker(struct file *file, struct page *page,
3356        loff_t *poffset)
3357{
3358        char *read_data;
3359        int rc;
3360
3361        /* Is the page cached? */
3362        rc = cifs_readpage_from_fscache(file_inode(file), page);
3363        if (rc == 0)
3364                goto read_complete;
3365
3366        page_cache_get(page);
3367        read_data = kmap(page);
3368        /* for reads over a certain size could initiate async read ahead */
3369
3370        rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3371
3372        if (rc < 0)
3373                goto io_error;
3374        else
3375                cifs_dbg(FYI, "Bytes read %d\n", rc);
3376
3377        file_inode(file)->i_atime =
3378                current_fs_time(file_inode(file)->i_sb);
3379
3380        if (PAGE_CACHE_SIZE > rc)
3381                memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3382
3383        flush_dcache_page(page);
3384        SetPageUptodate(page);
3385
3386        /* send this page to the cache */
3387        cifs_readpage_to_fscache(file_inode(file), page);
3388
3389        rc = 0;
3390
3391io_error:
3392        kunmap(page);
3393        page_cache_release(page);
3394
3395read_complete:
3396        return rc;
3397}
3398
3399static int cifs_readpage(struct file *file, struct page *page)
3400{
3401        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3402        int rc = -EACCES;
3403        unsigned int xid;
3404
3405        xid = get_xid();
3406
3407        if (file->private_data == NULL) {
3408                rc = -EBADF;
3409                free_xid(xid);
3410                return rc;
3411        }
3412
3413        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3414                 page, (int)offset, (int)offset);
3415
3416        rc = cifs_readpage_worker(file, page, &offset);
3417
3418        unlock_page(page);
3419
3420        free_xid(xid);
3421        return rc;
3422}
3423
3424static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3425{
3426        struct cifsFileInfo *open_file;
3427
3428        spin_lock(&cifs_file_list_lock);
3429        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3430                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3431                        spin_unlock(&cifs_file_list_lock);
3432                        return 1;
3433                }
3434        }
3435        spin_unlock(&cifs_file_list_lock);
3436        return 0;
3437}
3438
3439/* We do not want to update the file size from server for inodes
3440   open for write - to avoid races with writepage extending
3441   the file - in the future we could consider allowing
3442   refreshing the inode only on increases in the file size
3443   but this is tricky to do without racing with writebehind
3444   page caching in the current Linux kernel design */
3445bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3446{
3447        if (!cifsInode)
3448                return true;
3449
3450        if (is_inode_writable(cifsInode)) {
3451                /* This inode is open for write at least once */
3452                struct cifs_sb_info *cifs_sb;
3453
3454                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3455                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3456                        /* since no page cache to corrupt on directio
3457                        we can change size safely */
3458                        return true;
3459                }
3460
3461                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3462                        return true;
3463
3464                return false;
3465        } else
3466                return true;
3467}
3468
3469static int cifs_write_begin(struct file *file, struct address_space *mapping,
3470                        loff_t pos, unsigned len, unsigned flags,
3471                        struct page **pagep, void **fsdata)
3472{
3473        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3474        loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3475        loff_t page_start = pos & PAGE_MASK;
3476        loff_t i_size;
3477        struct page *page;
3478        int rc = 0;
3479
3480        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3481
3482        page = grab_cache_page_write_begin(mapping, index, flags);
3483        if (!page) {
3484                rc = -ENOMEM;
3485                goto out;
3486        }
3487
3488        if (PageUptodate(page))
3489                goto out;
3490
3491        /*
3492         * If we write a full page it will be up to date, no need to read from
3493         * the server. If the write is short, we'll end up doing a sync write
3494         * instead.
3495         */
3496        if (len == PAGE_CACHE_SIZE)
3497                goto out;
3498
3499        /*
3500         * optimize away the read when we have an oplock, and we're not
3501         * expecting to use any of the data we'd be reading in. That
3502         * is, when the page lies beyond the EOF, or straddles the EOF
3503         * and the write will cover all of the existing data.
3504         */
3505        if (CIFS_I(mapping->host)->clientCanCacheRead) {
3506                i_size = i_size_read(mapping->host);
3507                if (page_start >= i_size ||
3508                    (offset == 0 && (pos + len) >= i_size)) {
3509                        zero_user_segments(page, 0, offset,
3510                                           offset + len,
3511                                           PAGE_CACHE_SIZE);
3512                        /*
3513                         * PageChecked means that the parts of the page
3514                         * to which we're not writing are considered up
3515                         * to date. Once the data is copied to the
3516                         * page, it can be set uptodate.
3517                         */
3518                        SetPageChecked(page);
3519                        goto out;
3520                }
3521        }
3522
3523        if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3524                /*
3525                 * might as well read a page, it is fast enough. If we get
3526                 * an error, we don't need to return it. cifs_write_end will
3527                 * do a sync write instead since PG_uptodate isn't set.
3528                 */
3529                cifs_readpage_worker(file, page, &page_start);
3530        } else {
3531                /* we could try using another file handle if there is one -
3532                   but how would we lock it to prevent close of that handle
3533                   racing with this read? In any case
3534                   this will be written out by write_end so is fine */
3535        }
3536out:
3537        *pagep = page;
3538        return rc;
3539}
3540
3541static int cifs_release_page(struct page *page, gfp_t gfp)
3542{
3543        if (PagePrivate(page))
3544                return 0;
3545
3546        return cifs_fscache_release_page(page, gfp);
3547}
3548
3549static void cifs_invalidate_page(struct page *page, unsigned long offset)
3550{
3551        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3552
3553        if (offset == 0)
3554                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3555}
3556
3557static int cifs_launder_page(struct page *page)
3558{
3559        int rc = 0;
3560        loff_t range_start = page_offset(page);
3561        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3562        struct writeback_control wbc = {
3563                .sync_mode = WB_SYNC_ALL,
3564                .nr_to_write = 0,
3565                .range_start = range_start,
3566                .range_end = range_end,
3567        };
3568
3569        cifs_dbg(FYI, "Launder page: %p\n", page);
3570
3571        if (clear_page_dirty_for_io(page))
3572                rc = cifs_writepage_locked(page, &wbc);
3573
3574        cifs_fscache_invalidate_page(page, page->mapping->host);
3575        return rc;
3576}
3577
3578void cifs_oplock_break(struct work_struct *work)
3579{
3580        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3581                                                  oplock_break);
3582        struct inode *inode = cfile->dentry->d_inode;
3583        struct cifsInodeInfo *cinode = CIFS_I(inode);
3584        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3585        int rc = 0;
3586
3587        if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3588                                                cifs_has_mand_locks(cinode)) {
3589                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3590                         inode);
3591                cinode->clientCanCacheRead = false;
3592        }
3593
3594        if (inode && S_ISREG(inode->i_mode)) {
3595                if (cinode->clientCanCacheRead)
3596                        break_lease(inode, O_RDONLY);
3597                else
3598                        break_lease(inode, O_WRONLY);
3599                rc = filemap_fdatawrite(inode->i_mapping);
3600                if (cinode->clientCanCacheRead == 0) {
3601                        rc = filemap_fdatawait(inode->i_mapping);
3602                        mapping_set_error(inode->i_mapping, rc);
3603                        cifs_invalidate_mapping(inode);
3604                }
3605                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3606        }
3607
3608        rc = cifs_push_locks(cfile);
3609        if (rc)
3610                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3611
3612        /*
3613         * releasing stale oplock after recent reconnect of smb session using
3614         * a now incorrect file handle is not a data integrity issue but do
3615         * not bother sending an oplock release if session to server still is
3616         * disconnected since oplock already released by the server
3617         */
3618        if (!cfile->oplock_break_cancelled) {
3619                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3620                                                             cinode);
3621                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3622        }
3623}
3624
3625const struct address_space_operations cifs_addr_ops = {
3626        .readpage = cifs_readpage,
3627        .readpages = cifs_readpages,
3628        .writepage = cifs_writepage,
3629        .writepages = cifs_writepages,
3630        .write_begin = cifs_write_begin,
3631        .write_end = cifs_write_end,
3632        .set_page_dirty = __set_page_dirty_nobuffers,
3633        .releasepage = cifs_release_page,
3634        .invalidatepage = cifs_invalidate_page,
3635        .launder_page = cifs_launder_page,
3636};
3637
3638/*
3639 * cifs_readpages requires the server to support a buffer large enough to
3640 * contain the header plus one complete page of data.  Otherwise, we need
3641 * to leave cifs_readpages out of the address space operations.
3642 */
3643const struct address_space_operations cifs_addr_ops_smallbuf = {
3644        .readpage = cifs_readpage,
3645        .writepage = cifs_writepage,
3646        .writepages = cifs_writepages,
3647        .write_begin = cifs_write_begin,
3648        .write_end = cifs_write_end,
3649        .set_page_dirty = __set_page_dirty_nobuffers,
3650        .releasepage = cifs_release_page,
3651        .invalidatepage = cifs_invalidate_page,
3652        .launder_page = cifs_launder_page,
3653};
3654