linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49        if ((flags & O_ACCMODE) == O_RDONLY)
  50                return GENERIC_READ;
  51        else if ((flags & O_ACCMODE) == O_WRONLY)
  52                return GENERIC_WRITE;
  53        else if ((flags & O_ACCMODE) == O_RDWR) {
  54                /* GENERIC_ALL is too much permission to request
  55                   can cause unnecessary access denied on create */
  56                /* return GENERIC_ALL; */
  57                return (GENERIC_READ | GENERIC_WRITE);
  58        }
  59
  60        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62                FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67        u32 posix_flags = 0;
  68
  69        if ((flags & O_ACCMODE) == O_RDONLY)
  70                posix_flags = SMB_O_RDONLY;
  71        else if ((flags & O_ACCMODE) == O_WRONLY)
  72                posix_flags = SMB_O_WRONLY;
  73        else if ((flags & O_ACCMODE) == O_RDWR)
  74                posix_flags = SMB_O_RDWR;
  75
  76        if (flags & O_CREAT) {
  77                posix_flags |= SMB_O_CREAT;
  78                if (flags & O_EXCL)
  79                        posix_flags |= SMB_O_EXCL;
  80        } else if (flags & O_EXCL)
  81                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82                         current->comm, current->tgid);
  83
  84        if (flags & O_TRUNC)
  85                posix_flags |= SMB_O_TRUNC;
  86        /* be safe and imply O_SYNC for O_DSYNC */
  87        if (flags & O_DSYNC)
  88                posix_flags |= SMB_O_SYNC;
  89        if (flags & O_DIRECTORY)
  90                posix_flags |= SMB_O_DIRECTORY;
  91        if (flags & O_NOFOLLOW)
  92                posix_flags |= SMB_O_NOFOLLOW;
  93        if (flags & O_DIRECT)
  94                posix_flags |= SMB_O_DIRECT;
  95
  96        return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102                return FILE_CREATE;
 103        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104                return FILE_OVERWRITE_IF;
 105        else if ((flags & O_CREAT) == O_CREAT)
 106                return FILE_OPEN_IF;
 107        else if ((flags & O_TRUNC) == O_TRUNC)
 108                return FILE_OVERWRITE;
 109        else
 110                return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114                        struct super_block *sb, int mode, unsigned int f_flags,
 115                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117        int rc;
 118        FILE_UNIX_BASIC_INFO *presp_data;
 119        __u32 posix_flags = 0;
 120        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121        struct cifs_fattr fattr;
 122        struct tcon_link *tlink;
 123        struct cifs_tcon *tcon;
 124
 125        cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128        if (presp_data == NULL)
 129                return -ENOMEM;
 130
 131        tlink = cifs_sb_tlink(cifs_sb);
 132        if (IS_ERR(tlink)) {
 133                rc = PTR_ERR(tlink);
 134                goto posix_open_ret;
 135        }
 136
 137        tcon = tlink_tcon(tlink);
 138        mode &= ~current_umask();
 139
 140        posix_flags = cifs_posix_convert_flags(f_flags);
 141        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142                             poplock, full_path, cifs_sb->local_nls,
 143                             cifs_remap(cifs_sb));
 144        cifs_put_tlink(tlink);
 145
 146        if (rc)
 147                goto posix_open_ret;
 148
 149        if (presp_data->Type == cpu_to_le32(-1))
 150                goto posix_open_ret; /* open ok, caller does qpathinfo */
 151
 152        if (!pinode)
 153                goto posix_open_ret; /* caller does not need info */
 154
 155        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 156
 157        /* get new inode and set it up */
 158        if (*pinode == NULL) {
 159                cifs_fill_uniqueid(sb, &fattr);
 160                *pinode = cifs_iget(sb, &fattr);
 161                if (!*pinode) {
 162                        rc = -ENOMEM;
 163                        goto posix_open_ret;
 164                }
 165        } else {
 166                cifs_fattr_to_inode(*pinode, &fattr);
 167        }
 168
 169posix_open_ret:
 170        kfree(presp_data);
 171        return rc;
 172}
 173
 174static int
 175cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 176             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 177             struct cifs_fid *fid, unsigned int xid)
 178{
 179        int rc;
 180        int desired_access;
 181        int disposition;
 182        int create_options = CREATE_NOT_DIR;
 183        FILE_ALL_INFO *buf;
 184        struct TCP_Server_Info *server = tcon->ses->server;
 185        struct cifs_open_parms oparms;
 186
 187        if (!server->ops->open)
 188                return -ENOSYS;
 189
 190        desired_access = cifs_convert_flags(f_flags);
 191
 192/*********************************************************************
 193 *  open flag mapping table:
 194 *
 195 *      POSIX Flag            CIFS Disposition
 196 *      ----------            ----------------
 197 *      O_CREAT               FILE_OPEN_IF
 198 *      O_CREAT | O_EXCL      FILE_CREATE
 199 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 200 *      O_TRUNC               FILE_OVERWRITE
 201 *      none of the above     FILE_OPEN
 202 *
 203 *      Note that there is not a direct match between disposition
 204 *      FILE_SUPERSEDE (ie create whether or not file exists although
 205 *      O_CREAT | O_TRUNC is similar but truncates the existing
 206 *      file rather than creating a new file as FILE_SUPERSEDE does
 207 *      (which uses the attributes / metadata passed in on open call)
 208 *?
 209 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 210 *?  and the read write flags match reasonably.  O_LARGEFILE
 211 *?  is irrelevant because largefile support is always used
 212 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 213 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 214 *********************************************************************/
 215
 216        disposition = cifs_get_disposition(f_flags);
 217
 218        /* BB pass O_SYNC flag through on file attributes .. BB */
 219
 220        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 221        if (!buf)
 222                return -ENOMEM;
 223
 224        if (backup_cred(cifs_sb))
 225                create_options |= CREATE_OPEN_BACKUP_INTENT;
 226
 227        oparms.tcon = tcon;
 228        oparms.cifs_sb = cifs_sb;
 229        oparms.desired_access = desired_access;
 230        oparms.create_options = create_options;
 231        oparms.disposition = disposition;
 232        oparms.path = full_path;
 233        oparms.fid = fid;
 234        oparms.reconnect = false;
 235
 236        rc = server->ops->open(xid, &oparms, oplock, buf);
 237
 238        if (rc)
 239                goto out;
 240
 241        if (tcon->unix_ext)
 242                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 243                                              xid);
 244        else
 245                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 246                                         xid, fid);
 247
 248out:
 249        kfree(buf);
 250        return rc;
 251}
 252
 253static bool
 254cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 255{
 256        struct cifs_fid_locks *cur;
 257        bool has_locks = false;
 258
 259        down_read(&cinode->lock_sem);
 260        list_for_each_entry(cur, &cinode->llist, llist) {
 261                if (!list_empty(&cur->locks)) {
 262                        has_locks = true;
 263                        break;
 264                }
 265        }
 266        up_read(&cinode->lock_sem);
 267        return has_locks;
 268}
 269
 270struct cifsFileInfo *
 271cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 272                  struct tcon_link *tlink, __u32 oplock)
 273{
 274        struct dentry *dentry = file_dentry(file);
 275        struct inode *inode = d_inode(dentry);
 276        struct cifsInodeInfo *cinode = CIFS_I(inode);
 277        struct cifsFileInfo *cfile;
 278        struct cifs_fid_locks *fdlocks;
 279        struct cifs_tcon *tcon = tlink_tcon(tlink);
 280        struct TCP_Server_Info *server = tcon->ses->server;
 281
 282        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 283        if (cfile == NULL)
 284                return cfile;
 285
 286        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 287        if (!fdlocks) {
 288                kfree(cfile);
 289                return NULL;
 290        }
 291
 292        INIT_LIST_HEAD(&fdlocks->locks);
 293        fdlocks->cfile = cfile;
 294        cfile->llist = fdlocks;
 295        down_write(&cinode->lock_sem);
 296        list_add(&fdlocks->llist, &cinode->llist);
 297        up_write(&cinode->lock_sem);
 298
 299        cfile->count = 1;
 300        cfile->pid = current->tgid;
 301        cfile->uid = current_fsuid();
 302        cfile->dentry = dget(dentry);
 303        cfile->f_flags = file->f_flags;
 304        cfile->invalidHandle = false;
 305        cfile->tlink = cifs_get_tlink(tlink);
 306        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 307        mutex_init(&cfile->fh_mutex);
 308
 309        cifs_sb_active(inode->i_sb);
 310
 311        /*
 312         * If the server returned a read oplock and we have mandatory brlocks,
 313         * set oplock level to None.
 314         */
 315        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 316                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 317                oplock = 0;
 318        }
 319
 320        spin_lock(&cifs_file_list_lock);
 321        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 322                oplock = fid->pending_open->oplock;
 323        list_del(&fid->pending_open->olist);
 324
 325        fid->purge_cache = false;
 326        server->ops->set_fid(cfile, fid, oplock);
 327
 328        list_add(&cfile->tlist, &tcon->openFileList);
 329        /* if readable file instance put first in list*/
 330        if (file->f_mode & FMODE_READ)
 331                list_add(&cfile->flist, &cinode->openFileList);
 332        else
 333                list_add_tail(&cfile->flist, &cinode->openFileList);
 334        spin_unlock(&cifs_file_list_lock);
 335
 336        if (fid->purge_cache)
 337                cifs_zap_mapping(inode);
 338
 339        file->private_data = cfile;
 340        return cfile;
 341}
 342
 343struct cifsFileInfo *
 344cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 345{
 346        spin_lock(&cifs_file_list_lock);
 347        cifsFileInfo_get_locked(cifs_file);
 348        spin_unlock(&cifs_file_list_lock);
 349        return cifs_file;
 350}
 351
 352/*
 353 * Release a reference on the file private data. This may involve closing
 354 * the filehandle out on the server. Must be called without holding
 355 * cifs_file_list_lock.
 356 */
 357void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 358{
 359        struct inode *inode = d_inode(cifs_file->dentry);
 360        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 361        struct TCP_Server_Info *server = tcon->ses->server;
 362        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 363        struct super_block *sb = inode->i_sb;
 364        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 365        struct cifsLockInfo *li, *tmp;
 366        struct cifs_fid fid;
 367        struct cifs_pending_open open;
 368        bool oplock_break_cancelled;
 369
 370        spin_lock(&cifs_file_list_lock);
 371        if (--cifs_file->count > 0) {
 372                spin_unlock(&cifs_file_list_lock);
 373                return;
 374        }
 375
 376        if (server->ops->get_lease_key)
 377                server->ops->get_lease_key(inode, &fid);
 378
 379        /* store open in pending opens to make sure we don't miss lease break */
 380        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 381
 382        /* remove it from the lists */
 383        list_del(&cifs_file->flist);
 384        list_del(&cifs_file->tlist);
 385
 386        if (list_empty(&cifsi->openFileList)) {
 387                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 388                         d_inode(cifs_file->dentry));
 389                /*
 390                 * In strict cache mode we need invalidate mapping on the last
 391                 * close  because it may cause a error when we open this file
 392                 * again and get at least level II oplock.
 393                 */
 394                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 395                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 396                cifs_set_oplock_level(cifsi, 0);
 397        }
 398        spin_unlock(&cifs_file_list_lock);
 399
 400        oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 401
 402        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 403                struct TCP_Server_Info *server = tcon->ses->server;
 404                unsigned int xid;
 405
 406                xid = get_xid();
 407                if (server->ops->close)
 408                        server->ops->close(xid, tcon, &cifs_file->fid);
 409                _free_xid(xid);
 410        }
 411
 412        if (oplock_break_cancelled)
 413                cifs_done_oplock_break(cifsi);
 414
 415        cifs_del_pending_open(&open);
 416
 417        /*
 418         * Delete any outstanding lock records. We'll lose them when the file
 419         * is closed anyway.
 420         */
 421        down_write(&cifsi->lock_sem);
 422        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 423                list_del(&li->llist);
 424                cifs_del_lock_waiters(li);
 425                kfree(li);
 426        }
 427        list_del(&cifs_file->llist->llist);
 428        kfree(cifs_file->llist);
 429        up_write(&cifsi->lock_sem);
 430
 431        cifs_put_tlink(cifs_file->tlink);
 432        dput(cifs_file->dentry);
 433        cifs_sb_deactive(sb);
 434        kfree(cifs_file);
 435}
 436
 437int cifs_open(struct inode *inode, struct file *file)
 438
 439{
 440        int rc = -EACCES;
 441        unsigned int xid;
 442        __u32 oplock;
 443        struct cifs_sb_info *cifs_sb;
 444        struct TCP_Server_Info *server;
 445        struct cifs_tcon *tcon;
 446        struct tcon_link *tlink;
 447        struct cifsFileInfo *cfile = NULL;
 448        char *full_path = NULL;
 449        bool posix_open_ok = false;
 450        struct cifs_fid fid;
 451        struct cifs_pending_open open;
 452
 453        xid = get_xid();
 454
 455        cifs_sb = CIFS_SB(inode->i_sb);
 456        tlink = cifs_sb_tlink(cifs_sb);
 457        if (IS_ERR(tlink)) {
 458                free_xid(xid);
 459                return PTR_ERR(tlink);
 460        }
 461        tcon = tlink_tcon(tlink);
 462        server = tcon->ses->server;
 463
 464        full_path = build_path_from_dentry(file_dentry(file));
 465        if (full_path == NULL) {
 466                rc = -ENOMEM;
 467                goto out;
 468        }
 469
 470        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 471                 inode, file->f_flags, full_path);
 472
 473        if (file->f_flags & O_DIRECT &&
 474            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 475                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 476                        file->f_op = &cifs_file_direct_nobrl_ops;
 477                else
 478                        file->f_op = &cifs_file_direct_ops;
 479        }
 480
 481        if (server->oplocks)
 482                oplock = REQ_OPLOCK;
 483        else
 484                oplock = 0;
 485
 486        if (!tcon->broken_posix_open && tcon->unix_ext &&
 487            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 488                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 489                /* can not refresh inode info since size could be stale */
 490                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 491                                cifs_sb->mnt_file_mode /* ignored */,
 492                                file->f_flags, &oplock, &fid.netfid, xid);
 493                if (rc == 0) {
 494                        cifs_dbg(FYI, "posix open succeeded\n");
 495                        posix_open_ok = true;
 496                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 497                        if (tcon->ses->serverNOS)
 498                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 499                                         tcon->ses->serverName,
 500                                         tcon->ses->serverNOS);
 501                        tcon->broken_posix_open = true;
 502                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 503                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 504                        goto out;
 505                /*
 506                 * Else fallthrough to retry open the old way on network i/o
 507                 * or DFS errors.
 508                 */
 509        }
 510
 511        if (server->ops->get_lease_key)
 512                server->ops->get_lease_key(inode, &fid);
 513
 514        cifs_add_pending_open(&fid, tlink, &open);
 515
 516        if (!posix_open_ok) {
 517                if (server->ops->get_lease_key)
 518                        server->ops->get_lease_key(inode, &fid);
 519
 520                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 521                                  file->f_flags, &oplock, &fid, xid);
 522                if (rc) {
 523                        cifs_del_pending_open(&open);
 524                        goto out;
 525                }
 526        }
 527
 528        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 529        if (cfile == NULL) {
 530                if (server->ops->close)
 531                        server->ops->close(xid, tcon, &fid);
 532                cifs_del_pending_open(&open);
 533                rc = -ENOMEM;
 534                goto out;
 535        }
 536
 537        cifs_fscache_set_inode_cookie(inode, file);
 538
 539        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 540                /*
 541                 * Time to set mode which we can not set earlier due to
 542                 * problems creating new read-only files.
 543                 */
 544                struct cifs_unix_set_info_args args = {
 545                        .mode   = inode->i_mode,
 546                        .uid    = INVALID_UID, /* no change */
 547                        .gid    = INVALID_GID, /* no change */
 548                        .ctime  = NO_CHANGE_64,
 549                        .atime  = NO_CHANGE_64,
 550                        .mtime  = NO_CHANGE_64,
 551                        .device = 0,
 552                };
 553                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 554                                       cfile->pid);
 555        }
 556
 557out:
 558        kfree(full_path);
 559        free_xid(xid);
 560        cifs_put_tlink(tlink);
 561        return rc;
 562}
 563
 564static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 565
 566/*
 567 * Try to reacquire byte range locks that were released when session
 568 * to server was lost.
 569 */
 570static int
 571cifs_relock_file(struct cifsFileInfo *cfile)
 572{
 573        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 574        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 575        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 576        int rc = 0;
 577
 578        down_read(&cinode->lock_sem);
 579        if (cinode->can_cache_brlcks) {
 580                /* can cache locks - no need to relock */
 581                up_read(&cinode->lock_sem);
 582                return rc;
 583        }
 584
 585        if (cap_unix(tcon->ses) &&
 586            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 587            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 588                rc = cifs_push_posix_locks(cfile);
 589        else
 590                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 591
 592        up_read(&cinode->lock_sem);
 593        return rc;
 594}
 595
 596static int
 597cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 598{
 599        int rc = -EACCES;
 600        unsigned int xid;
 601        __u32 oplock;
 602        struct cifs_sb_info *cifs_sb;
 603        struct cifs_tcon *tcon;
 604        struct TCP_Server_Info *server;
 605        struct cifsInodeInfo *cinode;
 606        struct inode *inode;
 607        char *full_path = NULL;
 608        int desired_access;
 609        int disposition = FILE_OPEN;
 610        int create_options = CREATE_NOT_DIR;
 611        struct cifs_open_parms oparms;
 612
 613        xid = get_xid();
 614        mutex_lock(&cfile->fh_mutex);
 615        if (!cfile->invalidHandle) {
 616                mutex_unlock(&cfile->fh_mutex);
 617                rc = 0;
 618                free_xid(xid);
 619                return rc;
 620        }
 621
 622        inode = d_inode(cfile->dentry);
 623        cifs_sb = CIFS_SB(inode->i_sb);
 624        tcon = tlink_tcon(cfile->tlink);
 625        server = tcon->ses->server;
 626
 627        /*
 628         * Can not grab rename sem here because various ops, including those
 629         * that already have the rename sem can end up causing writepage to get
 630         * called and if the server was down that means we end up here, and we
 631         * can never tell if the caller already has the rename_sem.
 632         */
 633        full_path = build_path_from_dentry(cfile->dentry);
 634        if (full_path == NULL) {
 635                rc = -ENOMEM;
 636                mutex_unlock(&cfile->fh_mutex);
 637                free_xid(xid);
 638                return rc;
 639        }
 640
 641        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 642                 inode, cfile->f_flags, full_path);
 643
 644        if (tcon->ses->server->oplocks)
 645                oplock = REQ_OPLOCK;
 646        else
 647                oplock = 0;
 648
 649        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 650            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 651                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 652                /*
 653                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 654                 * original open. Must mask them off for a reopen.
 655                 */
 656                unsigned int oflags = cfile->f_flags &
 657                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 658
 659                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 660                                     cifs_sb->mnt_file_mode /* ignored */,
 661                                     oflags, &oplock, &cfile->fid.netfid, xid);
 662                if (rc == 0) {
 663                        cifs_dbg(FYI, "posix reopen succeeded\n");
 664                        oparms.reconnect = true;
 665                        goto reopen_success;
 666                }
 667                /*
 668                 * fallthrough to retry open the old way on errors, especially
 669                 * in the reconnect path it is important to retry hard
 670                 */
 671        }
 672
 673        desired_access = cifs_convert_flags(cfile->f_flags);
 674
 675        if (backup_cred(cifs_sb))
 676                create_options |= CREATE_OPEN_BACKUP_INTENT;
 677
 678        if (server->ops->get_lease_key)
 679                server->ops->get_lease_key(inode, &cfile->fid);
 680
 681        oparms.tcon = tcon;
 682        oparms.cifs_sb = cifs_sb;
 683        oparms.desired_access = desired_access;
 684        oparms.create_options = create_options;
 685        oparms.disposition = disposition;
 686        oparms.path = full_path;
 687        oparms.fid = &cfile->fid;
 688        oparms.reconnect = true;
 689
 690        /*
 691         * Can not refresh inode by passing in file_info buf to be returned by
 692         * ops->open and then calling get_inode_info with returned buf since
 693         * file might have write behind data that needs to be flushed and server
 694         * version of file size can be stale. If we knew for sure that inode was
 695         * not dirty locally we could do this.
 696         */
 697        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 698        if (rc == -ENOENT && oparms.reconnect == false) {
 699                /* durable handle timeout is expired - open the file again */
 700                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 701                /* indicate that we need to relock the file */
 702                oparms.reconnect = true;
 703        }
 704
 705        if (rc) {
 706                mutex_unlock(&cfile->fh_mutex);
 707                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 708                cifs_dbg(FYI, "oplock: %d\n", oplock);
 709                goto reopen_error_exit;
 710        }
 711
 712reopen_success:
 713        cfile->invalidHandle = false;
 714        mutex_unlock(&cfile->fh_mutex);
 715        cinode = CIFS_I(inode);
 716
 717        if (can_flush) {
 718                rc = filemap_write_and_wait(inode->i_mapping);
 719                mapping_set_error(inode->i_mapping, rc);
 720
 721                if (tcon->unix_ext)
 722                        rc = cifs_get_inode_info_unix(&inode, full_path,
 723                                                      inode->i_sb, xid);
 724                else
 725                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 726                                                 inode->i_sb, xid, NULL);
 727        }
 728        /*
 729         * Else we are writing out data to server already and could deadlock if
 730         * we tried to flush data, and since we do not know if we have data that
 731         * would invalidate the current end of file on the server we can not go
 732         * to the server to get the new inode info.
 733         */
 734
 735        server->ops->set_fid(cfile, &cfile->fid, oplock);
 736        if (oparms.reconnect)
 737                cifs_relock_file(cfile);
 738
 739reopen_error_exit:
 740        kfree(full_path);
 741        free_xid(xid);
 742        return rc;
 743}
 744
 745int cifs_close(struct inode *inode, struct file *file)
 746{
 747        if (file->private_data != NULL) {
 748                cifsFileInfo_put(file->private_data);
 749                file->private_data = NULL;
 750        }
 751
 752        /* return code from the ->release op is always ignored */
 753        return 0;
 754}
 755
 756int cifs_closedir(struct inode *inode, struct file *file)
 757{
 758        int rc = 0;
 759        unsigned int xid;
 760        struct cifsFileInfo *cfile = file->private_data;
 761        struct cifs_tcon *tcon;
 762        struct TCP_Server_Info *server;
 763        char *buf;
 764
 765        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 766
 767        if (cfile == NULL)
 768                return rc;
 769
 770        xid = get_xid();
 771        tcon = tlink_tcon(cfile->tlink);
 772        server = tcon->ses->server;
 773
 774        cifs_dbg(FYI, "Freeing private data in close dir\n");
 775        spin_lock(&cifs_file_list_lock);
 776        if (server->ops->dir_needs_close(cfile)) {
 777                cfile->invalidHandle = true;
 778                spin_unlock(&cifs_file_list_lock);
 779                if (server->ops->close_dir)
 780                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 781                else
 782                        rc = -ENOSYS;
 783                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 784                /* not much we can do if it fails anyway, ignore rc */
 785                rc = 0;
 786        } else
 787                spin_unlock(&cifs_file_list_lock);
 788
 789        buf = cfile->srch_inf.ntwrk_buf_start;
 790        if (buf) {
 791                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 792                cfile->srch_inf.ntwrk_buf_start = NULL;
 793                if (cfile->srch_inf.smallBuf)
 794                        cifs_small_buf_release(buf);
 795                else
 796                        cifs_buf_release(buf);
 797        }
 798
 799        cifs_put_tlink(cfile->tlink);
 800        kfree(file->private_data);
 801        file->private_data = NULL;
 802        /* BB can we lock the filestruct while this is going on? */
 803        free_xid(xid);
 804        return rc;
 805}
 806
 807static struct cifsLockInfo *
 808cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 809{
 810        struct cifsLockInfo *lock =
 811                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 812        if (!lock)
 813                return lock;
 814        lock->offset = offset;
 815        lock->length = length;
 816        lock->type = type;
 817        lock->pid = current->tgid;
 818        INIT_LIST_HEAD(&lock->blist);
 819        init_waitqueue_head(&lock->block_q);
 820        return lock;
 821}
 822
 823void
 824cifs_del_lock_waiters(struct cifsLockInfo *lock)
 825{
 826        struct cifsLockInfo *li, *tmp;
 827        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 828                list_del_init(&li->blist);
 829                wake_up(&li->block_q);
 830        }
 831}
 832
 833#define CIFS_LOCK_OP    0
 834#define CIFS_READ_OP    1
 835#define CIFS_WRITE_OP   2
 836
 837/* @rw_check : 0 - no op, 1 - read, 2 - write */
 838static bool
 839cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 840                            __u64 length, __u8 type, struct cifsFileInfo *cfile,
 841                            struct cifsLockInfo **conf_lock, int rw_check)
 842{
 843        struct cifsLockInfo *li;
 844        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 845        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 846
 847        list_for_each_entry(li, &fdlocks->locks, llist) {
 848                if (offset + length <= li->offset ||
 849                    offset >= li->offset + li->length)
 850                        continue;
 851                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 852                    server->ops->compare_fids(cfile, cur_cfile)) {
 853                        /* shared lock prevents write op through the same fid */
 854                        if (!(li->type & server->vals->shared_lock_type) ||
 855                            rw_check != CIFS_WRITE_OP)
 856                                continue;
 857                }
 858                if ((type & server->vals->shared_lock_type) &&
 859                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 860                     current->tgid == li->pid) || type == li->type))
 861                        continue;
 862                if (conf_lock)
 863                        *conf_lock = li;
 864                return true;
 865        }
 866        return false;
 867}
 868
 869bool
 870cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 871                        __u8 type, struct cifsLockInfo **conf_lock,
 872                        int rw_check)
 873{
 874        bool rc = false;
 875        struct cifs_fid_locks *cur;
 876        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 877
 878        list_for_each_entry(cur, &cinode->llist, llist) {
 879                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 880                                                 cfile, conf_lock, rw_check);
 881                if (rc)
 882                        break;
 883        }
 884
 885        return rc;
 886}
 887
 888/*
 889 * Check if there is another lock that prevents us to set the lock (mandatory
 890 * style). If such a lock exists, update the flock structure with its
 891 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 892 * or leave it the same if we can't. Returns 0 if we don't need to request to
 893 * the server or 1 otherwise.
 894 */
 895static int
 896cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 897               __u8 type, struct file_lock *flock)
 898{
 899        int rc = 0;
 900        struct cifsLockInfo *conf_lock;
 901        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 902        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 903        bool exist;
 904
 905        down_read(&cinode->lock_sem);
 906
 907        exist = cifs_find_lock_conflict(cfile, offset, length, type,
 908                                        &conf_lock, CIFS_LOCK_OP);
 909        if (exist) {
 910                flock->fl_start = conf_lock->offset;
 911                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 912                flock->fl_pid = conf_lock->pid;
 913                if (conf_lock->type & server->vals->shared_lock_type)
 914                        flock->fl_type = F_RDLCK;
 915                else
 916                        flock->fl_type = F_WRLCK;
 917        } else if (!cinode->can_cache_brlcks)
 918                rc = 1;
 919        else
 920                flock->fl_type = F_UNLCK;
 921
 922        up_read(&cinode->lock_sem);
 923        return rc;
 924}
 925
 926static void
 927cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 928{
 929        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 930        down_write(&cinode->lock_sem);
 931        list_add_tail(&lock->llist, &cfile->llist->locks);
 932        up_write(&cinode->lock_sem);
 933}
 934
 935/*
 936 * Set the byte-range lock (mandatory style). Returns:
 937 * 1) 0, if we set the lock and don't need to request to the server;
 938 * 2) 1, if no locks prevent us but we need to request to the server;
 939 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 940 */
 941static int
 942cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 943                 bool wait)
 944{
 945        struct cifsLockInfo *conf_lock;
 946        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 947        bool exist;
 948        int rc = 0;
 949
 950try_again:
 951        exist = false;
 952        down_write(&cinode->lock_sem);
 953
 954        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 955                                        lock->type, &conf_lock, CIFS_LOCK_OP);
 956        if (!exist && cinode->can_cache_brlcks) {
 957                list_add_tail(&lock->llist, &cfile->llist->locks);
 958                up_write(&cinode->lock_sem);
 959                return rc;
 960        }
 961
 962        if (!exist)
 963                rc = 1;
 964        else if (!wait)
 965                rc = -EACCES;
 966        else {
 967                list_add_tail(&lock->blist, &conf_lock->blist);
 968                up_write(&cinode->lock_sem);
 969                rc = wait_event_interruptible(lock->block_q,
 970                                        (lock->blist.prev == &lock->blist) &&
 971                                        (lock->blist.next == &lock->blist));
 972                if (!rc)
 973                        goto try_again;
 974                down_write(&cinode->lock_sem);
 975                list_del_init(&lock->blist);
 976        }
 977
 978        up_write(&cinode->lock_sem);
 979        return rc;
 980}
 981
 982/*
 983 * Check if there is another lock that prevents us to set the lock (posix
 984 * style). If such a lock exists, update the flock structure with its
 985 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 986 * or leave it the same if we can't. Returns 0 if we don't need to request to
 987 * the server or 1 otherwise.
 988 */
 989static int
 990cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 991{
 992        int rc = 0;
 993        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 994        unsigned char saved_type = flock->fl_type;
 995
 996        if ((flock->fl_flags & FL_POSIX) == 0)
 997                return 1;
 998
 999        down_read(&cinode->lock_sem);
1000        posix_test_lock(file, flock);
1001
1002        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1003                flock->fl_type = saved_type;
1004                rc = 1;
1005        }
1006
1007        up_read(&cinode->lock_sem);
1008        return rc;
1009}
1010
1011/*
1012 * Set the byte-range lock (posix style). Returns:
1013 * 1) 0, if we set the lock and don't need to request to the server;
1014 * 2) 1, if we need to request to the server;
1015 * 3) <0, if the error occurs while setting the lock.
1016 */
1017static int
1018cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1019{
1020        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1021        int rc = 1;
1022
1023        if ((flock->fl_flags & FL_POSIX) == 0)
1024                return rc;
1025
1026try_again:
1027        down_write(&cinode->lock_sem);
1028        if (!cinode->can_cache_brlcks) {
1029                up_write(&cinode->lock_sem);
1030                return rc;
1031        }
1032
1033        rc = posix_lock_file(file, flock, NULL);
1034        up_write(&cinode->lock_sem);
1035        if (rc == FILE_LOCK_DEFERRED) {
1036                rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1037                if (!rc)
1038                        goto try_again;
1039                posix_unblock_lock(flock);
1040        }
1041        return rc;
1042}
1043
1044int
1045cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1046{
1047        unsigned int xid;
1048        int rc = 0, stored_rc;
1049        struct cifsLockInfo *li, *tmp;
1050        struct cifs_tcon *tcon;
1051        unsigned int num, max_num, max_buf;
1052        LOCKING_ANDX_RANGE *buf, *cur;
1053        int types[] = {LOCKING_ANDX_LARGE_FILES,
1054                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1055        int i;
1056
1057        xid = get_xid();
1058        tcon = tlink_tcon(cfile->tlink);
1059
1060        /*
1061         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1062         * and check it for zero before using.
1063         */
1064        max_buf = tcon->ses->server->maxBuf;
1065        if (!max_buf) {
1066                free_xid(xid);
1067                return -EINVAL;
1068        }
1069
1070        max_num = (max_buf - sizeof(struct smb_hdr)) /
1071                                                sizeof(LOCKING_ANDX_RANGE);
1072        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1073        if (!buf) {
1074                free_xid(xid);
1075                return -ENOMEM;
1076        }
1077
1078        for (i = 0; i < 2; i++) {
1079                cur = buf;
1080                num = 0;
1081                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1082                        if (li->type != types[i])
1083                                continue;
1084                        cur->Pid = cpu_to_le16(li->pid);
1085                        cur->LengthLow = cpu_to_le32((u32)li->length);
1086                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1087                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1088                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1089                        if (++num == max_num) {
1090                                stored_rc = cifs_lockv(xid, tcon,
1091                                                       cfile->fid.netfid,
1092                                                       (__u8)li->type, 0, num,
1093                                                       buf);
1094                                if (stored_rc)
1095                                        rc = stored_rc;
1096                                cur = buf;
1097                                num = 0;
1098                        } else
1099                                cur++;
1100                }
1101
1102                if (num) {
1103                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1104                                               (__u8)types[i], 0, num, buf);
1105                        if (stored_rc)
1106                                rc = stored_rc;
1107                }
1108        }
1109
1110        kfree(buf);
1111        free_xid(xid);
1112        return rc;
1113}
1114
1115static __u32
1116hash_lockowner(fl_owner_t owner)
1117{
1118        return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1119}
1120
1121struct lock_to_push {
1122        struct list_head llist;
1123        __u64 offset;
1124        __u64 length;
1125        __u32 pid;
1126        __u16 netfid;
1127        __u8 type;
1128};
1129
1130static int
1131cifs_push_posix_locks(struct cifsFileInfo *cfile)
1132{
1133        struct inode *inode = d_inode(cfile->dentry);
1134        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1135        struct file_lock *flock;
1136        struct file_lock_context *flctx = inode->i_flctx;
1137        unsigned int count = 0, i;
1138        int rc = 0, xid, type;
1139        struct list_head locks_to_send, *el;
1140        struct lock_to_push *lck, *tmp;
1141        __u64 length;
1142
1143        xid = get_xid();
1144
1145        if (!flctx)
1146                goto out;
1147
1148        spin_lock(&flctx->flc_lock);
1149        list_for_each(el, &flctx->flc_posix) {
1150                count++;
1151        }
1152        spin_unlock(&flctx->flc_lock);
1153
1154        INIT_LIST_HEAD(&locks_to_send);
1155
1156        /*
1157         * Allocating count locks is enough because no FL_POSIX locks can be
1158         * added to the list while we are holding cinode->lock_sem that
1159         * protects locking operations of this inode.
1160         */
1161        for (i = 0; i < count; i++) {
1162                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1163                if (!lck) {
1164                        rc = -ENOMEM;
1165                        goto err_out;
1166                }
1167                list_add_tail(&lck->llist, &locks_to_send);
1168        }
1169
1170        el = locks_to_send.next;
1171        spin_lock(&flctx->flc_lock);
1172        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1173                if (el == &locks_to_send) {
1174                        /*
1175                         * The list ended. We don't have enough allocated
1176                         * structures - something is really wrong.
1177                         */
1178                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1179                        break;
1180                }
1181                length = 1 + flock->fl_end - flock->fl_start;
1182                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1183                        type = CIFS_RDLCK;
1184                else
1185                        type = CIFS_WRLCK;
1186                lck = list_entry(el, struct lock_to_push, llist);
1187                lck->pid = hash_lockowner(flock->fl_owner);
1188                lck->netfid = cfile->fid.netfid;
1189                lck->length = length;
1190                lck->type = type;
1191                lck->offset = flock->fl_start;
1192        }
1193        spin_unlock(&flctx->flc_lock);
1194
1195        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1196                int stored_rc;
1197
1198                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1199                                             lck->offset, lck->length, NULL,
1200                                             lck->type, 0);
1201                if (stored_rc)
1202                        rc = stored_rc;
1203                list_del(&lck->llist);
1204                kfree(lck);
1205        }
1206
1207out:
1208        free_xid(xid);
1209        return rc;
1210err_out:
1211        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1212                list_del(&lck->llist);
1213                kfree(lck);
1214        }
1215        goto out;
1216}
1217
1218static int
1219cifs_push_locks(struct cifsFileInfo *cfile)
1220{
1221        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1222        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1223        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1224        int rc = 0;
1225
1226        /* we are going to update can_cache_brlcks here - need a write access */
1227        down_write(&cinode->lock_sem);
1228        if (!cinode->can_cache_brlcks) {
1229                up_write(&cinode->lock_sem);
1230                return rc;
1231        }
1232
1233        if (cap_unix(tcon->ses) &&
1234            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1235            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1236                rc = cifs_push_posix_locks(cfile);
1237        else
1238                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1239
1240        cinode->can_cache_brlcks = false;
1241        up_write(&cinode->lock_sem);
1242        return rc;
1243}
1244
1245static void
1246cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1247                bool *wait_flag, struct TCP_Server_Info *server)
1248{
1249        if (flock->fl_flags & FL_POSIX)
1250                cifs_dbg(FYI, "Posix\n");
1251        if (flock->fl_flags & FL_FLOCK)
1252                cifs_dbg(FYI, "Flock\n");
1253        if (flock->fl_flags & FL_SLEEP) {
1254                cifs_dbg(FYI, "Blocking lock\n");
1255                *wait_flag = true;
1256        }
1257        if (flock->fl_flags & FL_ACCESS)
1258                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1259        if (flock->fl_flags & FL_LEASE)
1260                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1261        if (flock->fl_flags &
1262            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1263               FL_ACCESS | FL_LEASE | FL_CLOSE)))
1264                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1265
1266        *type = server->vals->large_lock_type;
1267        if (flock->fl_type == F_WRLCK) {
1268                cifs_dbg(FYI, "F_WRLCK\n");
1269                *type |= server->vals->exclusive_lock_type;
1270                *lock = 1;
1271        } else if (flock->fl_type == F_UNLCK) {
1272                cifs_dbg(FYI, "F_UNLCK\n");
1273                *type |= server->vals->unlock_lock_type;
1274                *unlock = 1;
1275                /* Check if unlock includes more than one lock range */
1276        } else if (flock->fl_type == F_RDLCK) {
1277                cifs_dbg(FYI, "F_RDLCK\n");
1278                *type |= server->vals->shared_lock_type;
1279                *lock = 1;
1280        } else if (flock->fl_type == F_EXLCK) {
1281                cifs_dbg(FYI, "F_EXLCK\n");
1282                *type |= server->vals->exclusive_lock_type;
1283                *lock = 1;
1284        } else if (flock->fl_type == F_SHLCK) {
1285                cifs_dbg(FYI, "F_SHLCK\n");
1286                *type |= server->vals->shared_lock_type;
1287                *lock = 1;
1288        } else
1289                cifs_dbg(FYI, "Unknown type of lock\n");
1290}
1291
1292static int
1293cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1294           bool wait_flag, bool posix_lck, unsigned int xid)
1295{
1296        int rc = 0;
1297        __u64 length = 1 + flock->fl_end - flock->fl_start;
1298        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1299        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1300        struct TCP_Server_Info *server = tcon->ses->server;
1301        __u16 netfid = cfile->fid.netfid;
1302
1303        if (posix_lck) {
1304                int posix_lock_type;
1305
1306                rc = cifs_posix_lock_test(file, flock);
1307                if (!rc)
1308                        return rc;
1309
1310                if (type & server->vals->shared_lock_type)
1311                        posix_lock_type = CIFS_RDLCK;
1312                else
1313                        posix_lock_type = CIFS_WRLCK;
1314                rc = CIFSSMBPosixLock(xid, tcon, netfid,
1315                                      hash_lockowner(flock->fl_owner),
1316                                      flock->fl_start, length, flock,
1317                                      posix_lock_type, wait_flag);
1318                return rc;
1319        }
1320
1321        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1322        if (!rc)
1323                return rc;
1324
1325        /* BB we could chain these into one lock request BB */
1326        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1327                                    1, 0, false);
1328        if (rc == 0) {
1329                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1330                                            type, 0, 1, false);
1331                flock->fl_type = F_UNLCK;
1332                if (rc != 0)
1333                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1334                                 rc);
1335                return 0;
1336        }
1337
1338        if (type & server->vals->shared_lock_type) {
1339                flock->fl_type = F_WRLCK;
1340                return 0;
1341        }
1342
1343        type &= ~server->vals->exclusive_lock_type;
1344
1345        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1346                                    type | server->vals->shared_lock_type,
1347                                    1, 0, false);
1348        if (rc == 0) {
1349                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1350                        type | server->vals->shared_lock_type, 0, 1, false);
1351                flock->fl_type = F_RDLCK;
1352                if (rc != 0)
1353                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1354                                 rc);
1355        } else
1356                flock->fl_type = F_WRLCK;
1357
1358        return 0;
1359}
1360
1361void
1362cifs_move_llist(struct list_head *source, struct list_head *dest)
1363{
1364        struct list_head *li, *tmp;
1365        list_for_each_safe(li, tmp, source)
1366                list_move(li, dest);
1367}
1368
1369void
1370cifs_free_llist(struct list_head *llist)
1371{
1372        struct cifsLockInfo *li, *tmp;
1373        list_for_each_entry_safe(li, tmp, llist, llist) {
1374                cifs_del_lock_waiters(li);
1375                list_del(&li->llist);
1376                kfree(li);
1377        }
1378}
1379
1380int
1381cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1382                  unsigned int xid)
1383{
1384        int rc = 0, stored_rc;
1385        int types[] = {LOCKING_ANDX_LARGE_FILES,
1386                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1387        unsigned int i;
1388        unsigned int max_num, num, max_buf;
1389        LOCKING_ANDX_RANGE *buf, *cur;
1390        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1391        struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1392        struct cifsLockInfo *li, *tmp;
1393        __u64 length = 1 + flock->fl_end - flock->fl_start;
1394        struct list_head tmp_llist;
1395
1396        INIT_LIST_HEAD(&tmp_llist);
1397
1398        /*
1399         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1400         * and check it for zero before using.
1401         */
1402        max_buf = tcon->ses->server->maxBuf;
1403        if (!max_buf)
1404                return -EINVAL;
1405
1406        max_num = (max_buf - sizeof(struct smb_hdr)) /
1407                                                sizeof(LOCKING_ANDX_RANGE);
1408        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1409        if (!buf)
1410                return -ENOMEM;
1411
1412        down_write(&cinode->lock_sem);
1413        for (i = 0; i < 2; i++) {
1414                cur = buf;
1415                num = 0;
1416                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1417                        if (flock->fl_start > li->offset ||
1418                            (flock->fl_start + length) <
1419                            (li->offset + li->length))
1420                                continue;
1421                        if (current->tgid != li->pid)
1422                                continue;
1423                        if (types[i] != li->type)
1424                                continue;
1425                        if (cinode->can_cache_brlcks) {
1426                                /*
1427                                 * We can cache brlock requests - simply remove
1428                                 * a lock from the file's list.
1429                                 */
1430                                list_del(&li->llist);
1431                                cifs_del_lock_waiters(li);
1432                                kfree(li);
1433                                continue;
1434                        }
1435                        cur->Pid = cpu_to_le16(li->pid);
1436                        cur->LengthLow = cpu_to_le32((u32)li->length);
1437                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1438                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1439                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1440                        /*
1441                         * We need to save a lock here to let us add it again to
1442                         * the file's list if the unlock range request fails on
1443                         * the server.
1444                         */
1445                        list_move(&li->llist, &tmp_llist);
1446                        if (++num == max_num) {
1447                                stored_rc = cifs_lockv(xid, tcon,
1448                                                       cfile->fid.netfid,
1449                                                       li->type, num, 0, buf);
1450                                if (stored_rc) {
1451                                        /*
1452                                         * We failed on the unlock range
1453                                         * request - add all locks from the tmp
1454                                         * list to the head of the file's list.
1455                                         */
1456                                        cifs_move_llist(&tmp_llist,
1457                                                        &cfile->llist->locks);
1458                                        rc = stored_rc;
1459                                } else
1460                                        /*
1461                                         * The unlock range request succeed -
1462                                         * free the tmp list.
1463                                         */
1464                                        cifs_free_llist(&tmp_llist);
1465                                cur = buf;
1466                                num = 0;
1467                        } else
1468                                cur++;
1469                }
1470                if (num) {
1471                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1472                                               types[i], num, 0, buf);
1473                        if (stored_rc) {
1474                                cifs_move_llist(&tmp_llist,
1475                                                &cfile->llist->locks);
1476                                rc = stored_rc;
1477                        } else
1478                                cifs_free_llist(&tmp_llist);
1479                }
1480        }
1481
1482        up_write(&cinode->lock_sem);
1483        kfree(buf);
1484        return rc;
1485}
1486
1487static int
1488cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1489           bool wait_flag, bool posix_lck, int lock, int unlock,
1490           unsigned int xid)
1491{
1492        int rc = 0;
1493        __u64 length = 1 + flock->fl_end - flock->fl_start;
1494        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1495        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1496        struct TCP_Server_Info *server = tcon->ses->server;
1497        struct inode *inode = d_inode(cfile->dentry);
1498
1499        if (posix_lck) {
1500                int posix_lock_type;
1501
1502                rc = cifs_posix_lock_set(file, flock);
1503                if (!rc || rc < 0)
1504                        return rc;
1505
1506                if (type & server->vals->shared_lock_type)
1507                        posix_lock_type = CIFS_RDLCK;
1508                else
1509                        posix_lock_type = CIFS_WRLCK;
1510
1511                if (unlock == 1)
1512                        posix_lock_type = CIFS_UNLCK;
1513
1514                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1515                                      hash_lockowner(flock->fl_owner),
1516                                      flock->fl_start, length,
1517                                      NULL, posix_lock_type, wait_flag);
1518                goto out;
1519        }
1520
1521        if (lock) {
1522                struct cifsLockInfo *lock;
1523
1524                lock = cifs_lock_init(flock->fl_start, length, type);
1525                if (!lock)
1526                        return -ENOMEM;
1527
1528                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1529                if (rc < 0) {
1530                        kfree(lock);
1531                        return rc;
1532                }
1533                if (!rc)
1534                        goto out;
1535
1536                /*
1537                 * Windows 7 server can delay breaking lease from read to None
1538                 * if we set a byte-range lock on a file - break it explicitly
1539                 * before sending the lock to the server to be sure the next
1540                 * read won't conflict with non-overlapted locks due to
1541                 * pagereading.
1542                 */
1543                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1544                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1545                        cifs_zap_mapping(inode);
1546                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1547                                 inode);
1548                        CIFS_I(inode)->oplock = 0;
1549                }
1550
1551                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1552                                            type, 1, 0, wait_flag);
1553                if (rc) {
1554                        kfree(lock);
1555                        return rc;
1556                }
1557
1558                cifs_lock_add(cfile, lock);
1559        } else if (unlock)
1560                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1561
1562out:
1563        if (flock->fl_flags & FL_POSIX && !rc)
1564                rc = locks_lock_file_wait(file, flock);
1565        return rc;
1566}
1567
1568int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1569{
1570        int rc, xid;
1571        int lock = 0, unlock = 0;
1572        bool wait_flag = false;
1573        bool posix_lck = false;
1574        struct cifs_sb_info *cifs_sb;
1575        struct cifs_tcon *tcon;
1576        struct cifsInodeInfo *cinode;
1577        struct cifsFileInfo *cfile;
1578        __u16 netfid;
1579        __u32 type;
1580
1581        rc = -EACCES;
1582        xid = get_xid();
1583
1584        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1585                 cmd, flock->fl_flags, flock->fl_type,
1586                 flock->fl_start, flock->fl_end);
1587
1588        cfile = (struct cifsFileInfo *)file->private_data;
1589        tcon = tlink_tcon(cfile->tlink);
1590
1591        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1592                        tcon->ses->server);
1593
1594        cifs_sb = CIFS_FILE_SB(file);
1595        netfid = cfile->fid.netfid;
1596        cinode = CIFS_I(file_inode(file));
1597
1598        if (cap_unix(tcon->ses) &&
1599            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1600            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1601                posix_lck = true;
1602        /*
1603         * BB add code here to normalize offset and length to account for
1604         * negative length which we can not accept over the wire.
1605         */
1606        if (IS_GETLK(cmd)) {
1607                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1608                free_xid(xid);
1609                return rc;
1610        }
1611
1612        if (!lock && !unlock) {
1613                /*
1614                 * if no lock or unlock then nothing to do since we do not
1615                 * know what it is
1616                 */
1617                free_xid(xid);
1618                return -EOPNOTSUPP;
1619        }
1620
1621        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1622                        xid);
1623        free_xid(xid);
1624        return rc;
1625}
1626
1627/*
1628 * update the file size (if needed) after a write. Should be called with
1629 * the inode->i_lock held
1630 */
1631void
1632cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1633                      unsigned int bytes_written)
1634{
1635        loff_t end_of_write = offset + bytes_written;
1636
1637        if (end_of_write > cifsi->server_eof)
1638                cifsi->server_eof = end_of_write;
1639}
1640
1641static ssize_t
1642cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1643           size_t write_size, loff_t *offset)
1644{
1645        int rc = 0;
1646        unsigned int bytes_written = 0;
1647        unsigned int total_written;
1648        struct cifs_sb_info *cifs_sb;
1649        struct cifs_tcon *tcon;
1650        struct TCP_Server_Info *server;
1651        unsigned int xid;
1652        struct dentry *dentry = open_file->dentry;
1653        struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1654        struct cifs_io_parms io_parms;
1655
1656        cifs_sb = CIFS_SB(dentry->d_sb);
1657
1658        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1659                 write_size, *offset, dentry);
1660
1661        tcon = tlink_tcon(open_file->tlink);
1662        server = tcon->ses->server;
1663
1664        if (!server->ops->sync_write)
1665                return -ENOSYS;
1666
1667        xid = get_xid();
1668
1669        for (total_written = 0; write_size > total_written;
1670             total_written += bytes_written) {
1671                rc = -EAGAIN;
1672                while (rc == -EAGAIN) {
1673                        struct kvec iov[2];
1674                        unsigned int len;
1675
1676                        if (open_file->invalidHandle) {
1677                                /* we could deadlock if we called
1678                                   filemap_fdatawait from here so tell
1679                                   reopen_file not to flush data to
1680                                   server now */
1681                                rc = cifs_reopen_file(open_file, false);
1682                                if (rc != 0)
1683                                        break;
1684                        }
1685
1686                        len = min(server->ops->wp_retry_size(d_inode(dentry)),
1687                                  (unsigned int)write_size - total_written);
1688                        /* iov[0] is reserved for smb header */
1689                        iov[1].iov_base = (char *)write_data + total_written;
1690                        iov[1].iov_len = len;
1691                        io_parms.pid = pid;
1692                        io_parms.tcon = tcon;
1693                        io_parms.offset = *offset;
1694                        io_parms.length = len;
1695                        rc = server->ops->sync_write(xid, &open_file->fid,
1696                                        &io_parms, &bytes_written, iov, 1);
1697                }
1698                if (rc || (bytes_written == 0)) {
1699                        if (total_written)
1700                                break;
1701                        else {
1702                                free_xid(xid);
1703                                return rc;
1704                        }
1705                } else {
1706                        spin_lock(&d_inode(dentry)->i_lock);
1707                        cifs_update_eof(cifsi, *offset, bytes_written);
1708                        spin_unlock(&d_inode(dentry)->i_lock);
1709                        *offset += bytes_written;
1710                }
1711        }
1712
1713        cifs_stats_bytes_written(tcon, total_written);
1714
1715        if (total_written > 0) {
1716                spin_lock(&d_inode(dentry)->i_lock);
1717                if (*offset > d_inode(dentry)->i_size)
1718                        i_size_write(d_inode(dentry), *offset);
1719                spin_unlock(&d_inode(dentry)->i_lock);
1720        }
1721        mark_inode_dirty_sync(d_inode(dentry));
1722        free_xid(xid);
1723        return total_written;
1724}
1725
1726struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1727                                        bool fsuid_only)
1728{
1729        struct cifsFileInfo *open_file = NULL;
1730        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1731
1732        /* only filter by fsuid on multiuser mounts */
1733        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1734                fsuid_only = false;
1735
1736        spin_lock(&cifs_file_list_lock);
1737        /* we could simply get the first_list_entry since write-only entries
1738           are always at the end of the list but since the first entry might
1739           have a close pending, we go through the whole list */
1740        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1741                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1742                        continue;
1743                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1744                        if (!open_file->invalidHandle) {
1745                                /* found a good file */
1746                                /* lock it so it will not be closed on us */
1747                                cifsFileInfo_get_locked(open_file);
1748                                spin_unlock(&cifs_file_list_lock);
1749                                return open_file;
1750                        } /* else might as well continue, and look for
1751                             another, or simply have the caller reopen it
1752                             again rather than trying to fix this handle */
1753                } else /* write only file */
1754                        break; /* write only files are last so must be done */
1755        }
1756        spin_unlock(&cifs_file_list_lock);
1757        return NULL;
1758}
1759
1760struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1761                                        bool fsuid_only)
1762{
1763        struct cifsFileInfo *open_file, *inv_file = NULL;
1764        struct cifs_sb_info *cifs_sb;
1765        bool any_available = false;
1766        int rc;
1767        unsigned int refind = 0;
1768
1769        /* Having a null inode here (because mapping->host was set to zero by
1770        the VFS or MM) should not happen but we had reports of on oops (due to
1771        it being zero) during stress testcases so we need to check for it */
1772
1773        if (cifs_inode == NULL) {
1774                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1775                dump_stack();
1776                return NULL;
1777        }
1778
1779        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1780
1781        /* only filter by fsuid on multiuser mounts */
1782        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1783                fsuid_only = false;
1784
1785        spin_lock(&cifs_file_list_lock);
1786refind_writable:
1787        if (refind > MAX_REOPEN_ATT) {
1788                spin_unlock(&cifs_file_list_lock);
1789                return NULL;
1790        }
1791        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1792                if (!any_available && open_file->pid != current->tgid)
1793                        continue;
1794                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1795                        continue;
1796                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1797                        if (!open_file->invalidHandle) {
1798                                /* found a good writable file */
1799                                cifsFileInfo_get_locked(open_file);
1800                                spin_unlock(&cifs_file_list_lock);
1801                                return open_file;
1802                        } else {
1803                                if (!inv_file)
1804                                        inv_file = open_file;
1805                        }
1806                }
1807        }
1808        /* couldn't find useable FH with same pid, try any available */
1809        if (!any_available) {
1810                any_available = true;
1811                goto refind_writable;
1812        }
1813
1814        if (inv_file) {
1815                any_available = false;
1816                cifsFileInfo_get_locked(inv_file);
1817        }
1818
1819        spin_unlock(&cifs_file_list_lock);
1820
1821        if (inv_file) {
1822                rc = cifs_reopen_file(inv_file, false);
1823                if (!rc)
1824                        return inv_file;
1825                else {
1826                        spin_lock(&cifs_file_list_lock);
1827                        list_move_tail(&inv_file->flist,
1828                                        &cifs_inode->openFileList);
1829                        spin_unlock(&cifs_file_list_lock);
1830                        cifsFileInfo_put(inv_file);
1831                        spin_lock(&cifs_file_list_lock);
1832                        ++refind;
1833                        inv_file = NULL;
1834                        goto refind_writable;
1835                }
1836        }
1837
1838        return NULL;
1839}
1840
1841static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1842{
1843        struct address_space *mapping = page->mapping;
1844        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1845        char *write_data;
1846        int rc = -EFAULT;
1847        int bytes_written = 0;
1848        struct inode *inode;
1849        struct cifsFileInfo *open_file;
1850
1851        if (!mapping || !mapping->host)
1852                return -EFAULT;
1853
1854        inode = page->mapping->host;
1855
1856        offset += (loff_t)from;
1857        write_data = kmap(page);
1858        write_data += from;
1859
1860        if ((to > PAGE_SIZE) || (from > to)) {
1861                kunmap(page);
1862                return -EIO;
1863        }
1864
1865        /* racing with truncate? */
1866        if (offset > mapping->host->i_size) {
1867                kunmap(page);
1868                return 0; /* don't care */
1869        }
1870
1871        /* check to make sure that we are not extending the file */
1872        if (mapping->host->i_size - offset < (loff_t)to)
1873                to = (unsigned)(mapping->host->i_size - offset);
1874
1875        open_file = find_writable_file(CIFS_I(mapping->host), false);
1876        if (open_file) {
1877                bytes_written = cifs_write(open_file, open_file->pid,
1878                                           write_data, to - from, &offset);
1879                cifsFileInfo_put(open_file);
1880                /* Does mm or vfs already set times? */
1881                inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1882                if ((bytes_written > 0) && (offset))
1883                        rc = 0;
1884                else if (bytes_written < 0)
1885                        rc = bytes_written;
1886        } else {
1887                cifs_dbg(FYI, "No writeable filehandles for inode\n");
1888                rc = -EIO;
1889        }
1890
1891        kunmap(page);
1892        return rc;
1893}
1894
1895static struct cifs_writedata *
1896wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1897                          pgoff_t end, pgoff_t *index,
1898                          unsigned int *found_pages)
1899{
1900        unsigned int nr_pages;
1901        struct page **pages;
1902        struct cifs_writedata *wdata;
1903
1904        wdata = cifs_writedata_alloc((unsigned int)tofind,
1905                                     cifs_writev_complete);
1906        if (!wdata)
1907                return NULL;
1908
1909        /*
1910         * find_get_pages_tag seems to return a max of 256 on each
1911         * iteration, so we must call it several times in order to
1912         * fill the array or the wsize is effectively limited to
1913         * 256 * PAGE_SIZE.
1914         */
1915        *found_pages = 0;
1916        pages = wdata->pages;
1917        do {
1918                nr_pages = find_get_pages_tag(mapping, index,
1919                                              PAGECACHE_TAG_DIRTY, tofind,
1920                                              pages);
1921                *found_pages += nr_pages;
1922                tofind -= nr_pages;
1923                pages += nr_pages;
1924        } while (nr_pages && tofind && *index <= end);
1925
1926        return wdata;
1927}
1928
1929static unsigned int
1930wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1931                    struct address_space *mapping,
1932                    struct writeback_control *wbc,
1933                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1934{
1935        unsigned int nr_pages = 0, i;
1936        struct page *page;
1937
1938        for (i = 0; i < found_pages; i++) {
1939                page = wdata->pages[i];
1940                /*
1941                 * At this point we hold neither mapping->tree_lock nor
1942                 * lock on the page itself: the page may be truncated or
1943                 * invalidated (changing page->mapping to NULL), or even
1944                 * swizzled back from swapper_space to tmpfs file
1945                 * mapping
1946                 */
1947
1948                if (nr_pages == 0)
1949                        lock_page(page);
1950                else if (!trylock_page(page))
1951                        break;
1952
1953                if (unlikely(page->mapping != mapping)) {
1954                        unlock_page(page);
1955                        break;
1956                }
1957
1958                if (!wbc->range_cyclic && page->index > end) {
1959                        *done = true;
1960                        unlock_page(page);
1961                        break;
1962                }
1963
1964                if (*next && (page->index != *next)) {
1965                        /* Not next consecutive page */
1966                        unlock_page(page);
1967                        break;
1968                }
1969
1970                if (wbc->sync_mode != WB_SYNC_NONE)
1971                        wait_on_page_writeback(page);
1972
1973                if (PageWriteback(page) ||
1974                                !clear_page_dirty_for_io(page)) {
1975                        unlock_page(page);
1976                        break;
1977                }
1978
1979                /*
1980                 * This actually clears the dirty bit in the radix tree.
1981                 * See cifs_writepage() for more commentary.
1982                 */
1983                set_page_writeback(page);
1984                if (page_offset(page) >= i_size_read(mapping->host)) {
1985                        *done = true;
1986                        unlock_page(page);
1987                        end_page_writeback(page);
1988                        break;
1989                }
1990
1991                wdata->pages[i] = page;
1992                *next = page->index + 1;
1993                ++nr_pages;
1994        }
1995
1996        /* reset index to refind any pages skipped */
1997        if (nr_pages == 0)
1998                *index = wdata->pages[0]->index + 1;
1999
2000        /* put any pages we aren't going to use */
2001        for (i = nr_pages; i < found_pages; i++) {
2002                put_page(wdata->pages[i]);
2003                wdata->pages[i] = NULL;
2004        }
2005
2006        return nr_pages;
2007}
2008
2009static int
2010wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2011                 struct address_space *mapping, struct writeback_control *wbc)
2012{
2013        int rc = 0;
2014        struct TCP_Server_Info *server;
2015        unsigned int i;
2016
2017        wdata->sync_mode = wbc->sync_mode;
2018        wdata->nr_pages = nr_pages;
2019        wdata->offset = page_offset(wdata->pages[0]);
2020        wdata->pagesz = PAGE_SIZE;
2021        wdata->tailsz = min(i_size_read(mapping->host) -
2022                        page_offset(wdata->pages[nr_pages - 1]),
2023                        (loff_t)PAGE_SIZE);
2024        wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2025
2026        if (wdata->cfile != NULL)
2027                cifsFileInfo_put(wdata->cfile);
2028        wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2029        if (!wdata->cfile) {
2030                cifs_dbg(VFS, "No writable handles for inode\n");
2031                rc = -EBADF;
2032        } else {
2033                wdata->pid = wdata->cfile->pid;
2034                server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2035                rc = server->ops->async_writev(wdata, cifs_writedata_release);
2036        }
2037
2038        for (i = 0; i < nr_pages; ++i)
2039                unlock_page(wdata->pages[i]);
2040
2041        return rc;
2042}
2043
2044static int cifs_writepages(struct address_space *mapping,
2045                           struct writeback_control *wbc)
2046{
2047        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2048        struct TCP_Server_Info *server;
2049        bool done = false, scanned = false, range_whole = false;
2050        pgoff_t end, index;
2051        struct cifs_writedata *wdata;
2052        int rc = 0;
2053
2054        /*
2055         * If wsize is smaller than the page cache size, default to writing
2056         * one page at a time via cifs_writepage
2057         */
2058        if (cifs_sb->wsize < PAGE_SIZE)
2059                return generic_writepages(mapping, wbc);
2060
2061        if (wbc->range_cyclic) {
2062                index = mapping->writeback_index; /* Start from prev offset */
2063                end = -1;
2064        } else {
2065                index = wbc->range_start >> PAGE_SHIFT;
2066                end = wbc->range_end >> PAGE_SHIFT;
2067                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2068                        range_whole = true;
2069                scanned = true;
2070        }
2071        server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2072retry:
2073        while (!done && index <= end) {
2074                unsigned int i, nr_pages, found_pages, wsize, credits;
2075                pgoff_t next = 0, tofind, saved_index = index;
2076
2077                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2078                                                   &wsize, &credits);
2079                if (rc)
2080                        break;
2081
2082                tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2083
2084                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2085                                                  &found_pages);
2086                if (!wdata) {
2087                        rc = -ENOMEM;
2088                        add_credits_and_wake_if(server, credits, 0);
2089                        break;
2090                }
2091
2092                if (found_pages == 0) {
2093                        kref_put(&wdata->refcount, cifs_writedata_release);
2094                        add_credits_and_wake_if(server, credits, 0);
2095                        break;
2096                }
2097
2098                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2099                                               end, &index, &next, &done);
2100
2101                /* nothing to write? */
2102                if (nr_pages == 0) {
2103                        kref_put(&wdata->refcount, cifs_writedata_release);
2104                        add_credits_and_wake_if(server, credits, 0);
2105                        continue;
2106                }
2107
2108                wdata->credits = credits;
2109
2110                rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2111
2112                /* send failure -- clean up the mess */
2113                if (rc != 0) {
2114                        add_credits_and_wake_if(server, wdata->credits, 0);
2115                        for (i = 0; i < nr_pages; ++i) {
2116                                if (rc == -EAGAIN)
2117                                        redirty_page_for_writepage(wbc,
2118                                                           wdata->pages[i]);
2119                                else
2120                                        SetPageError(wdata->pages[i]);
2121                                end_page_writeback(wdata->pages[i]);
2122                                put_page(wdata->pages[i]);
2123                        }
2124                        if (rc != -EAGAIN)
2125                                mapping_set_error(mapping, rc);
2126                }
2127                kref_put(&wdata->refcount, cifs_writedata_release);
2128
2129                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2130                        index = saved_index;
2131                        continue;
2132                }
2133
2134                wbc->nr_to_write -= nr_pages;
2135                if (wbc->nr_to_write <= 0)
2136                        done = true;
2137
2138                index = next;
2139        }
2140
2141        if (!scanned && !done) {
2142                /*
2143                 * We hit the last page and there is more work to be done: wrap
2144                 * back to the start of the file
2145                 */
2146                scanned = true;
2147                index = 0;
2148                goto retry;
2149        }
2150
2151        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2152                mapping->writeback_index = index;
2153
2154        return rc;
2155}
2156
2157static int
2158cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2159{
2160        int rc;
2161        unsigned int xid;
2162
2163        xid = get_xid();
2164/* BB add check for wbc flags */
2165        get_page(page);
2166        if (!PageUptodate(page))
2167                cifs_dbg(FYI, "ppw - page not up to date\n");
2168
2169        /*
2170         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2171         *
2172         * A writepage() implementation always needs to do either this,
2173         * or re-dirty the page with "redirty_page_for_writepage()" in
2174         * the case of a failure.
2175         *
2176         * Just unlocking the page will cause the radix tree tag-bits
2177         * to fail to update with the state of the page correctly.
2178         */
2179        set_page_writeback(page);
2180retry_write:
2181        rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2182        if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2183                goto retry_write;
2184        else if (rc == -EAGAIN)
2185                redirty_page_for_writepage(wbc, page);
2186        else if (rc != 0)
2187                SetPageError(page);
2188        else
2189                SetPageUptodate(page);
2190        end_page_writeback(page);
2191        put_page(page);
2192        free_xid(xid);
2193        return rc;
2194}
2195
2196static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2197{
2198        int rc = cifs_writepage_locked(page, wbc);
2199        unlock_page(page);
2200        return rc;
2201}
2202
2203static int cifs_write_end(struct file *file, struct address_space *mapping,
2204                        loff_t pos, unsigned len, unsigned copied,
2205                        struct page *page, void *fsdata)
2206{
2207        int rc;
2208        struct inode *inode = mapping->host;
2209        struct cifsFileInfo *cfile = file->private_data;
2210        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2211        __u32 pid;
2212
2213        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2214                pid = cfile->pid;
2215        else
2216                pid = current->tgid;
2217
2218        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2219                 page, pos, copied);
2220
2221        if (PageChecked(page)) {
2222                if (copied == len)
2223                        SetPageUptodate(page);
2224                ClearPageChecked(page);
2225        } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2226                SetPageUptodate(page);
2227
2228        if (!PageUptodate(page)) {
2229                char *page_data;
2230                unsigned offset = pos & (PAGE_SIZE - 1);
2231                unsigned int xid;
2232
2233                xid = get_xid();
2234                /* this is probably better than directly calling
2235                   partialpage_write since in this function the file handle is
2236                   known which we might as well leverage */
2237                /* BB check if anything else missing out of ppw
2238                   such as updating last write time */
2239                page_data = kmap(page);
2240                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2241                /* if (rc < 0) should we set writebehind rc? */
2242                kunmap(page);
2243
2244                free_xid(xid);
2245        } else {
2246                rc = copied;
2247                pos += copied;
2248                set_page_dirty(page);
2249        }
2250
2251        if (rc > 0) {
2252                spin_lock(&inode->i_lock);
2253                if (pos > inode->i_size)
2254                        i_size_write(inode, pos);
2255                spin_unlock(&inode->i_lock);
2256        }
2257
2258        unlock_page(page);
2259        put_page(page);
2260
2261        return rc;
2262}
2263
2264int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2265                      int datasync)
2266{
2267        unsigned int xid;
2268        int rc = 0;
2269        struct cifs_tcon *tcon;
2270        struct TCP_Server_Info *server;
2271        struct cifsFileInfo *smbfile = file->private_data;
2272        struct inode *inode = file_inode(file);
2273        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2274
2275        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2276        if (rc)
2277                return rc;
2278        inode_lock(inode);
2279
2280        xid = get_xid();
2281
2282        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2283                 file, datasync);
2284
2285        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2286                rc = cifs_zap_mapping(inode);
2287                if (rc) {
2288                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2289                        rc = 0; /* don't care about it in fsync */
2290                }
2291        }
2292
2293        tcon = tlink_tcon(smbfile->tlink);
2294        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2295                server = tcon->ses->server;
2296                if (server->ops->flush)
2297                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2298                else
2299                        rc = -ENOSYS;
2300        }
2301
2302        free_xid(xid);
2303        inode_unlock(inode);
2304        return rc;
2305}
2306
2307int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2308{
2309        unsigned int xid;
2310        int rc = 0;
2311        struct cifs_tcon *tcon;
2312        struct TCP_Server_Info *server;
2313        struct cifsFileInfo *smbfile = file->private_data;
2314        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2315        struct inode *inode = file->f_mapping->host;
2316
2317        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2318        if (rc)
2319                return rc;
2320        inode_lock(inode);
2321
2322        xid = get_xid();
2323
2324        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2325                 file, datasync);
2326
2327        tcon = tlink_tcon(smbfile->tlink);
2328        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2329                server = tcon->ses->server;
2330                if (server->ops->flush)
2331                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2332                else
2333                        rc = -ENOSYS;
2334        }
2335
2336        free_xid(xid);
2337        inode_unlock(inode);
2338        return rc;
2339}
2340
2341/*
2342 * As file closes, flush all cached write data for this inode checking
2343 * for write behind errors.
2344 */
2345int cifs_flush(struct file *file, fl_owner_t id)
2346{
2347        struct inode *inode = file_inode(file);
2348        int rc = 0;
2349
2350        if (file->f_mode & FMODE_WRITE)
2351                rc = filemap_write_and_wait(inode->i_mapping);
2352
2353        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2354
2355        return rc;
2356}
2357
2358static int
2359cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2360{
2361        int rc = 0;
2362        unsigned long i;
2363
2364        for (i = 0; i < num_pages; i++) {
2365                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2366                if (!pages[i]) {
2367                        /*
2368                         * save number of pages we have already allocated and
2369                         * return with ENOMEM error
2370                         */
2371                        num_pages = i;
2372                        rc = -ENOMEM;
2373                        break;
2374                }
2375        }
2376
2377        if (rc) {
2378                for (i = 0; i < num_pages; i++)
2379                        put_page(pages[i]);
2380        }
2381        return rc;
2382}
2383
2384static inline
2385size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2386{
2387        size_t num_pages;
2388        size_t clen;
2389
2390        clen = min_t(const size_t, len, wsize);
2391        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2392
2393        if (cur_len)
2394                *cur_len = clen;
2395
2396        return num_pages;
2397}
2398
2399static void
2400cifs_uncached_writedata_release(struct kref *refcount)
2401{
2402        int i;
2403        struct cifs_writedata *wdata = container_of(refcount,
2404                                        struct cifs_writedata, refcount);
2405
2406        for (i = 0; i < wdata->nr_pages; i++)
2407                put_page(wdata->pages[i]);
2408        cifs_writedata_release(refcount);
2409}
2410
2411static void
2412cifs_uncached_writev_complete(struct work_struct *work)
2413{
2414        struct cifs_writedata *wdata = container_of(work,
2415                                        struct cifs_writedata, work);
2416        struct inode *inode = d_inode(wdata->cfile->dentry);
2417        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2418
2419        spin_lock(&inode->i_lock);
2420        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2421        if (cifsi->server_eof > inode->i_size)
2422                i_size_write(inode, cifsi->server_eof);
2423        spin_unlock(&inode->i_lock);
2424
2425        complete(&wdata->done);
2426
2427        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2428}
2429
2430static int
2431wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2432                      size_t *len, unsigned long *num_pages)
2433{
2434        size_t save_len, copied, bytes, cur_len = *len;
2435        unsigned long i, nr_pages = *num_pages;
2436
2437        save_len = cur_len;
2438        for (i = 0; i < nr_pages; i++) {
2439                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2440                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2441                cur_len -= copied;
2442                /*
2443                 * If we didn't copy as much as we expected, then that
2444                 * may mean we trod into an unmapped area. Stop copying
2445                 * at that point. On the next pass through the big
2446                 * loop, we'll likely end up getting a zero-length
2447                 * write and bailing out of it.
2448                 */
2449                if (copied < bytes)
2450                        break;
2451        }
2452        cur_len = save_len - cur_len;
2453        *len = cur_len;
2454
2455        /*
2456         * If we have no data to send, then that probably means that
2457         * the copy above failed altogether. That's most likely because
2458         * the address in the iovec was bogus. Return -EFAULT and let
2459         * the caller free anything we allocated and bail out.
2460         */
2461        if (!cur_len)
2462                return -EFAULT;
2463
2464        /*
2465         * i + 1 now represents the number of pages we actually used in
2466         * the copy phase above.
2467         */
2468        *num_pages = i + 1;
2469        return 0;
2470}
2471
2472static int
2473cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2474                     struct cifsFileInfo *open_file,
2475                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2476{
2477        int rc = 0;
2478        size_t cur_len;
2479        unsigned long nr_pages, num_pages, i;
2480        struct cifs_writedata *wdata;
2481        struct iov_iter saved_from;
2482        loff_t saved_offset = offset;
2483        pid_t pid;
2484        struct TCP_Server_Info *server;
2485
2486        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2487                pid = open_file->pid;
2488        else
2489                pid = current->tgid;
2490
2491        server = tlink_tcon(open_file->tlink)->ses->server;
2492        memcpy(&saved_from, from, sizeof(struct iov_iter));
2493
2494        do {
2495                unsigned int wsize, credits;
2496
2497                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2498                                                   &wsize, &credits);
2499                if (rc)
2500                        break;
2501
2502                nr_pages = get_numpages(wsize, len, &cur_len);
2503                wdata = cifs_writedata_alloc(nr_pages,
2504                                             cifs_uncached_writev_complete);
2505                if (!wdata) {
2506                        rc = -ENOMEM;
2507                        add_credits_and_wake_if(server, credits, 0);
2508                        break;
2509                }
2510
2511                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2512                if (rc) {
2513                        kfree(wdata);
2514                        add_credits_and_wake_if(server, credits, 0);
2515                        break;
2516                }
2517
2518                num_pages = nr_pages;
2519                rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2520                if (rc) {
2521                        for (i = 0; i < nr_pages; i++)
2522                                put_page(wdata->pages[i]);
2523                        kfree(wdata);
2524                        add_credits_and_wake_if(server, credits, 0);
2525                        break;
2526                }
2527
2528                /*
2529                 * Bring nr_pages down to the number of pages we actually used,
2530                 * and free any pages that we didn't use.
2531                 */
2532                for ( ; nr_pages > num_pages; nr_pages--)
2533                        put_page(wdata->pages[nr_pages - 1]);
2534
2535                wdata->sync_mode = WB_SYNC_ALL;
2536                wdata->nr_pages = nr_pages;
2537                wdata->offset = (__u64)offset;
2538                wdata->cfile = cifsFileInfo_get(open_file);
2539                wdata->pid = pid;
2540                wdata->bytes = cur_len;
2541                wdata->pagesz = PAGE_SIZE;
2542                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2543                wdata->credits = credits;
2544
2545                if (!wdata->cfile->invalidHandle ||
2546                    !cifs_reopen_file(wdata->cfile, false))
2547                        rc = server->ops->async_writev(wdata,
2548                                        cifs_uncached_writedata_release);
2549                if (rc) {
2550                        add_credits_and_wake_if(server, wdata->credits, 0);
2551                        kref_put(&wdata->refcount,
2552                                 cifs_uncached_writedata_release);
2553                        if (rc == -EAGAIN) {
2554                                memcpy(from, &saved_from,
2555                                       sizeof(struct iov_iter));
2556                                iov_iter_advance(from, offset - saved_offset);
2557                                continue;
2558                        }
2559                        break;
2560                }
2561
2562                list_add_tail(&wdata->list, wdata_list);
2563                offset += cur_len;
2564                len -= cur_len;
2565        } while (len > 0);
2566
2567        return rc;
2568}
2569
2570ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2571{
2572        struct file *file = iocb->ki_filp;
2573        ssize_t total_written = 0;
2574        struct cifsFileInfo *open_file;
2575        struct cifs_tcon *tcon;
2576        struct cifs_sb_info *cifs_sb;
2577        struct cifs_writedata *wdata, *tmp;
2578        struct list_head wdata_list;
2579        struct iov_iter saved_from;
2580        int rc;
2581
2582        /*
2583         * BB - optimize the way when signing is disabled. We can drop this
2584         * extra memory-to-memory copying and use iovec buffers for constructing
2585         * write request.
2586         */
2587
2588        rc = generic_write_checks(iocb, from);
2589        if (rc <= 0)
2590                return rc;
2591
2592        INIT_LIST_HEAD(&wdata_list);
2593        cifs_sb = CIFS_FILE_SB(file);
2594        open_file = file->private_data;
2595        tcon = tlink_tcon(open_file->tlink);
2596
2597        if (!tcon->ses->server->ops->async_writev)
2598                return -ENOSYS;
2599
2600        memcpy(&saved_from, from, sizeof(struct iov_iter));
2601
2602        rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
2603                                  open_file, cifs_sb, &wdata_list);
2604
2605        /*
2606         * If at least one write was successfully sent, then discard any rc
2607         * value from the later writes. If the other write succeeds, then
2608         * we'll end up returning whatever was written. If it fails, then
2609         * we'll get a new rc value from that.
2610         */
2611        if (!list_empty(&wdata_list))
2612                rc = 0;
2613
2614        /*
2615         * Wait for and collect replies for any successful sends in order of
2616         * increasing offset. Once an error is hit or we get a fatal signal
2617         * while waiting, then return without waiting for any more replies.
2618         */
2619restart_loop:
2620        list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2621                if (!rc) {
2622                        /* FIXME: freezable too? */
2623                        rc = wait_for_completion_killable(&wdata->done);
2624                        if (rc)
2625                                rc = -EINTR;
2626                        else if (wdata->result)
2627                                rc = wdata->result;
2628                        else
2629                                total_written += wdata->bytes;
2630
2631                        /* resend call if it's a retryable error */
2632                        if (rc == -EAGAIN) {
2633                                struct list_head tmp_list;
2634                                struct iov_iter tmp_from;
2635
2636                                INIT_LIST_HEAD(&tmp_list);
2637                                list_del_init(&wdata->list);
2638
2639                                memcpy(&tmp_from, &saved_from,
2640                                       sizeof(struct iov_iter));
2641                                iov_iter_advance(&tmp_from,
2642                                                 wdata->offset - iocb->ki_pos);
2643
2644                                rc = cifs_write_from_iter(wdata->offset,
2645                                                wdata->bytes, &tmp_from,
2646                                                open_file, cifs_sb, &tmp_list);
2647
2648                                list_splice(&tmp_list, &wdata_list);
2649
2650                                kref_put(&wdata->refcount,
2651                                         cifs_uncached_writedata_release);
2652                                goto restart_loop;
2653                        }
2654                }
2655                list_del_init(&wdata->list);
2656                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2657        }
2658
2659        if (unlikely(!total_written))
2660                return rc;
2661
2662        iocb->ki_pos += total_written;
2663        set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
2664        cifs_stats_bytes_written(tcon, total_written);
2665        return total_written;
2666}
2667
2668static ssize_t
2669cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2670{
2671        struct file *file = iocb->ki_filp;
2672        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2673        struct inode *inode = file->f_mapping->host;
2674        struct cifsInodeInfo *cinode = CIFS_I(inode);
2675        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2676        ssize_t rc;
2677
2678        /*
2679         * We need to hold the sem to be sure nobody modifies lock list
2680         * with a brlock that prevents writing.
2681         */
2682        down_read(&cinode->lock_sem);
2683        inode_lock(inode);
2684
2685        rc = generic_write_checks(iocb, from);
2686        if (rc <= 0)
2687                goto out;
2688
2689        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2690                                     server->vals->exclusive_lock_type, NULL,
2691                                     CIFS_WRITE_OP))
2692                rc = __generic_file_write_iter(iocb, from);
2693        else
2694                rc = -EACCES;
2695out:
2696        inode_unlock(inode);
2697
2698        if (rc > 0)
2699                rc = generic_write_sync(iocb, rc);
2700        up_read(&cinode->lock_sem);
2701        return rc;
2702}
2703
2704ssize_t
2705cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2706{
2707        struct inode *inode = file_inode(iocb->ki_filp);
2708        struct cifsInodeInfo *cinode = CIFS_I(inode);
2709        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2710        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2711                                                iocb->ki_filp->private_data;
2712        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2713        ssize_t written;
2714
2715        written = cifs_get_writer(cinode);
2716        if (written)
2717                return written;
2718
2719        if (CIFS_CACHE_WRITE(cinode)) {
2720                if (cap_unix(tcon->ses) &&
2721                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2722                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2723                        written = generic_file_write_iter(iocb, from);
2724                        goto out;
2725                }
2726                written = cifs_writev(iocb, from);
2727                goto out;
2728        }
2729        /*
2730         * For non-oplocked files in strict cache mode we need to write the data
2731         * to the server exactly from the pos to pos+len-1 rather than flush all
2732         * affected pages because it may cause a error with mandatory locks on
2733         * these pages but not on the region from pos to ppos+len-1.
2734         */
2735        written = cifs_user_writev(iocb, from);
2736        if (written > 0 && CIFS_CACHE_READ(cinode)) {
2737                /*
2738                 * Windows 7 server can delay breaking level2 oplock if a write
2739                 * request comes - break it on the client to prevent reading
2740                 * an old data.
2741                 */
2742                cifs_zap_mapping(inode);
2743                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2744                         inode);
2745                cinode->oplock = 0;
2746        }
2747out:
2748        cifs_put_writer(cinode);
2749        return written;
2750}
2751
2752static struct cifs_readdata *
2753cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2754{
2755        struct cifs_readdata *rdata;
2756
2757        rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2758                        GFP_KERNEL);
2759        if (rdata != NULL) {
2760                kref_init(&rdata->refcount);
2761                INIT_LIST_HEAD(&rdata->list);
2762                init_completion(&rdata->done);
2763                INIT_WORK(&rdata->work, complete);
2764        }
2765
2766        return rdata;
2767}
2768
2769void
2770cifs_readdata_release(struct kref *refcount)
2771{
2772        struct cifs_readdata *rdata = container_of(refcount,
2773                                        struct cifs_readdata, refcount);
2774
2775        if (rdata->cfile)
2776                cifsFileInfo_put(rdata->cfile);
2777
2778        kfree(rdata);
2779}
2780
2781static int
2782cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2783{
2784        int rc = 0;
2785        struct page *page;
2786        unsigned int i;
2787
2788        for (i = 0; i < nr_pages; i++) {
2789                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2790                if (!page) {
2791                        rc = -ENOMEM;
2792                        break;
2793                }
2794                rdata->pages[i] = page;
2795        }
2796
2797        if (rc) {
2798                for (i = 0; i < nr_pages; i++) {
2799                        put_page(rdata->pages[i]);
2800                        rdata->pages[i] = NULL;
2801                }
2802        }
2803        return rc;
2804}
2805
2806static void
2807cifs_uncached_readdata_release(struct kref *refcount)
2808{
2809        struct cifs_readdata *rdata = container_of(refcount,
2810                                        struct cifs_readdata, refcount);
2811        unsigned int i;
2812
2813        for (i = 0; i < rdata->nr_pages; i++) {
2814                put_page(rdata->pages[i]);
2815                rdata->pages[i] = NULL;
2816        }
2817        cifs_readdata_release(refcount);
2818}
2819
2820/**
2821 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2822 * @rdata:      the readdata response with list of pages holding data
2823 * @iter:       destination for our data
2824 *
2825 * This function copies data from a list of pages in a readdata response into
2826 * an array of iovecs. It will first calculate where the data should go
2827 * based on the info in the readdata and then copy the data into that spot.
2828 */
2829static int
2830cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2831{
2832        size_t remaining = rdata->got_bytes;
2833        unsigned int i;
2834
2835        for (i = 0; i < rdata->nr_pages; i++) {
2836                struct page *page = rdata->pages[i];
2837                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2838                size_t written = copy_page_to_iter(page, 0, copy, iter);
2839                remaining -= written;
2840                if (written < copy && iov_iter_count(iter) > 0)
2841                        break;
2842        }
2843        return remaining ? -EFAULT : 0;
2844}
2845
2846static void
2847cifs_uncached_readv_complete(struct work_struct *work)
2848{
2849        struct cifs_readdata *rdata = container_of(work,
2850                                                struct cifs_readdata, work);
2851
2852        complete(&rdata->done);
2853        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2854}
2855
2856static int
2857cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2858                        struct cifs_readdata *rdata, unsigned int len)
2859{
2860        int result = 0;
2861        unsigned int i;
2862        unsigned int nr_pages = rdata->nr_pages;
2863
2864        rdata->got_bytes = 0;
2865        rdata->tailsz = PAGE_SIZE;
2866        for (i = 0; i < nr_pages; i++) {
2867                struct page *page = rdata->pages[i];
2868                size_t n;
2869
2870                if (len <= 0) {
2871                        /* no need to hold page hostage */
2872                        rdata->pages[i] = NULL;
2873                        rdata->nr_pages--;
2874                        put_page(page);
2875                        continue;
2876                }
2877                n = len;
2878                if (len >= PAGE_SIZE) {
2879                        /* enough data to fill the page */
2880                        n = PAGE_SIZE;
2881                        len -= n;
2882                } else {
2883                        zero_user(page, len, PAGE_SIZE - len);
2884                        rdata->tailsz = len;
2885                        len = 0;
2886                }
2887                result = cifs_read_page_from_socket(server, page, n);
2888                if (result < 0)
2889                        break;
2890
2891                rdata->got_bytes += result;
2892        }
2893
2894        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2895                                                rdata->got_bytes : result;
2896}
2897
2898static int
2899cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2900                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2901{
2902        struct cifs_readdata *rdata;
2903        unsigned int npages, rsize, credits;
2904        size_t cur_len;
2905        int rc;
2906        pid_t pid;
2907        struct TCP_Server_Info *server;
2908
2909        server = tlink_tcon(open_file->tlink)->ses->server;
2910
2911        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2912                pid = open_file->pid;
2913        else
2914                pid = current->tgid;
2915
2916        do {
2917                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2918                                                   &rsize, &credits);
2919                if (rc)
2920                        break;
2921
2922                cur_len = min_t(const size_t, len, rsize);
2923                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2924
2925                /* allocate a readdata struct */
2926                rdata = cifs_readdata_alloc(npages,
2927                                            cifs_uncached_readv_complete);
2928                if (!rdata) {
2929                        add_credits_and_wake_if(server, credits, 0);
2930                        rc = -ENOMEM;
2931                        break;
2932                }
2933
2934                rc = cifs_read_allocate_pages(rdata, npages);
2935                if (rc)
2936                        goto error;
2937
2938                rdata->cfile = cifsFileInfo_get(open_file);
2939                rdata->nr_pages = npages;
2940                rdata->offset = offset;
2941                rdata->bytes = cur_len;
2942                rdata->pid = pid;
2943                rdata->pagesz = PAGE_SIZE;
2944                rdata->read_into_pages = cifs_uncached_read_into_pages;
2945                rdata->credits = credits;
2946
2947                if (!rdata->cfile->invalidHandle ||
2948                    !cifs_reopen_file(rdata->cfile, true))
2949                        rc = server->ops->async_readv(rdata);
2950error:
2951                if (rc) {
2952                        add_credits_and_wake_if(server, rdata->credits, 0);
2953                        kref_put(&rdata->refcount,
2954                                 cifs_uncached_readdata_release);
2955                        if (rc == -EAGAIN)
2956                                continue;
2957                        break;
2958                }
2959
2960                list_add_tail(&rdata->list, rdata_list);
2961                offset += cur_len;
2962                len -= cur_len;
2963        } while (len > 0);
2964
2965        return rc;
2966}
2967
2968ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2969{
2970        struct file *file = iocb->ki_filp;
2971        ssize_t rc;
2972        size_t len;
2973        ssize_t total_read = 0;
2974        loff_t offset = iocb->ki_pos;
2975        struct cifs_sb_info *cifs_sb;
2976        struct cifs_tcon *tcon;
2977        struct cifsFileInfo *open_file;
2978        struct cifs_readdata *rdata, *tmp;
2979        struct list_head rdata_list;
2980
2981        len = iov_iter_count(to);
2982        if (!len)
2983                return 0;
2984
2985        INIT_LIST_HEAD(&rdata_list);
2986        cifs_sb = CIFS_FILE_SB(file);
2987        open_file = file->private_data;
2988        tcon = tlink_tcon(open_file->tlink);
2989
2990        if (!tcon->ses->server->ops->async_readv)
2991                return -ENOSYS;
2992
2993        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2994                cifs_dbg(FYI, "attempting read on write only file instance\n");
2995
2996        rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
2997
2998        /* if at least one read request send succeeded, then reset rc */
2999        if (!list_empty(&rdata_list))
3000                rc = 0;
3001
3002        len = iov_iter_count(to);
3003        /* the loop below should proceed in the order of increasing offsets */
3004again:
3005        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3006                if (!rc) {
3007                        /* FIXME: freezable sleep too? */
3008                        rc = wait_for_completion_killable(&rdata->done);
3009                        if (rc)
3010                                rc = -EINTR;
3011                        else if (rdata->result == -EAGAIN) {
3012                                /* resend call if it's a retryable error */
3013                                struct list_head tmp_list;
3014                                unsigned int got_bytes = rdata->got_bytes;
3015
3016                                list_del_init(&rdata->list);
3017                                INIT_LIST_HEAD(&tmp_list);
3018
3019                                /*
3020                                 * Got a part of data and then reconnect has
3021                                 * happened -- fill the buffer and continue
3022                                 * reading.
3023                                 */
3024                                if (got_bytes && got_bytes < rdata->bytes) {
3025                                        rc = cifs_readdata_to_iov(rdata, to);
3026                                        if (rc) {
3027                                                kref_put(&rdata->refcount,
3028                                                cifs_uncached_readdata_release);
3029                                                continue;
3030                                        }
3031                                }
3032
3033                                rc = cifs_send_async_read(
3034                                                rdata->offset + got_bytes,
3035                                                rdata->bytes - got_bytes,
3036                                                rdata->cfile, cifs_sb,
3037                                                &tmp_list);
3038
3039                                list_splice(&tmp_list, &rdata_list);
3040
3041                                kref_put(&rdata->refcount,
3042                                         cifs_uncached_readdata_release);
3043                                goto again;
3044                        } else if (rdata->result)
3045                                rc = rdata->result;
3046                        else
3047                                rc = cifs_readdata_to_iov(rdata, to);
3048
3049                        /* if there was a short read -- discard anything left */
3050                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3051                                rc = -ENODATA;
3052                }
3053                list_del_init(&rdata->list);
3054                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3055        }
3056
3057        total_read = len - iov_iter_count(to);
3058
3059        cifs_stats_bytes_read(tcon, total_read);
3060
3061        /* mask nodata case */
3062        if (rc == -ENODATA)
3063                rc = 0;
3064
3065        if (total_read) {
3066                iocb->ki_pos += total_read;
3067                return total_read;
3068        }
3069        return rc;
3070}
3071
3072ssize_t
3073cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3074{
3075        struct inode *inode = file_inode(iocb->ki_filp);
3076        struct cifsInodeInfo *cinode = CIFS_I(inode);
3077        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3078        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3079                                                iocb->ki_filp->private_data;
3080        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3081        int rc = -EACCES;
3082
3083        /*
3084         * In strict cache mode we need to read from the server all the time
3085         * if we don't have level II oplock because the server can delay mtime
3086         * change - so we can't make a decision about inode invalidating.
3087         * And we can also fail with pagereading if there are mandatory locks
3088         * on pages affected by this read but not on the region from pos to
3089         * pos+len-1.
3090         */
3091        if (!CIFS_CACHE_READ(cinode))
3092                return cifs_user_readv(iocb, to);
3093
3094        if (cap_unix(tcon->ses) &&
3095            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3096            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3097                return generic_file_read_iter(iocb, to);
3098
3099        /*
3100         * We need to hold the sem to be sure nobody modifies lock list
3101         * with a brlock that prevents reading.
3102         */
3103        down_read(&cinode->lock_sem);
3104        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3105                                     tcon->ses->server->vals->shared_lock_type,
3106                                     NULL, CIFS_READ_OP))
3107                rc = generic_file_read_iter(iocb, to);
3108        up_read(&cinode->lock_sem);
3109        return rc;
3110}
3111
3112static ssize_t
3113cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3114{
3115        int rc = -EACCES;
3116        unsigned int bytes_read = 0;
3117        unsigned int total_read;
3118        unsigned int current_read_size;
3119        unsigned int rsize;
3120        struct cifs_sb_info *cifs_sb;
3121        struct cifs_tcon *tcon;
3122        struct TCP_Server_Info *server;
3123        unsigned int xid;
3124        char *cur_offset;
3125        struct cifsFileInfo *open_file;
3126        struct cifs_io_parms io_parms;
3127        int buf_type = CIFS_NO_BUFFER;
3128        __u32 pid;
3129
3130        xid = get_xid();
3131        cifs_sb = CIFS_FILE_SB(file);
3132
3133        /* FIXME: set up handlers for larger reads and/or convert to async */
3134        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3135
3136        if (file->private_data == NULL) {
3137                rc = -EBADF;
3138                free_xid(xid);
3139                return rc;
3140        }
3141        open_file = file->private_data;
3142        tcon = tlink_tcon(open_file->tlink);
3143        server = tcon->ses->server;
3144
3145        if (!server->ops->sync_read) {
3146                free_xid(xid);
3147                return -ENOSYS;
3148        }
3149
3150        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3151                pid = open_file->pid;
3152        else
3153                pid = current->tgid;
3154
3155        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3156                cifs_dbg(FYI, "attempting read on write only file instance\n");
3157
3158        for (total_read = 0, cur_offset = read_data; read_size > total_read;
3159             total_read += bytes_read, cur_offset += bytes_read) {
3160                do {
3161                        current_read_size = min_t(uint, read_size - total_read,
3162                                                  rsize);
3163                        /*
3164                         * For windows me and 9x we do not want to request more
3165                         * than it negotiated since it will refuse the read
3166                         * then.
3167                         */
3168                        if ((tcon->ses) && !(tcon->ses->capabilities &
3169                                tcon->ses->server->vals->cap_large_files)) {
3170                                current_read_size = min_t(uint,
3171                                        current_read_size, CIFSMaxBufSize);
3172                        }
3173                        if (open_file->invalidHandle) {
3174                                rc = cifs_reopen_file(open_file, true);
3175                                if (rc != 0)
3176                                        break;
3177                        }
3178                        io_parms.pid = pid;
3179                        io_parms.tcon = tcon;
3180                        io_parms.offset = *offset;
3181                        io_parms.length = current_read_size;
3182                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3183                                                    &bytes_read, &cur_offset,
3184                                                    &buf_type);
3185                } while (rc == -EAGAIN);
3186
3187                if (rc || (bytes_read == 0)) {
3188                        if (total_read) {
3189                                break;
3190                        } else {
3191                                free_xid(xid);
3192                                return rc;
3193                        }
3194                } else {
3195                        cifs_stats_bytes_read(tcon, total_read);
3196                        *offset += bytes_read;
3197                }
3198        }
3199        free_xid(xid);
3200        return total_read;
3201}
3202
3203/*
3204 * If the page is mmap'ed into a process' page tables, then we need to make
3205 * sure that it doesn't change while being written back.
3206 */
3207static int
3208cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3209{
3210        struct page *page = vmf->page;
3211
3212        lock_page(page);
3213        return VM_FAULT_LOCKED;
3214}
3215
3216static const struct vm_operations_struct cifs_file_vm_ops = {
3217        .fault = filemap_fault,
3218        .map_pages = filemap_map_pages,
3219        .page_mkwrite = cifs_page_mkwrite,
3220};
3221
3222int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3223{
3224        int rc, xid;
3225        struct inode *inode = file_inode(file);
3226
3227        xid = get_xid();
3228
3229        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3230                rc = cifs_zap_mapping(inode);
3231                if (rc)
3232                        return rc;
3233        }
3234
3235        rc = generic_file_mmap(file, vma);
3236        if (rc == 0)
3237                vma->vm_ops = &cifs_file_vm_ops;
3238        free_xid(xid);
3239        return rc;
3240}
3241
3242int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3243{
3244        int rc, xid;
3245
3246        xid = get_xid();
3247        rc = cifs_revalidate_file(file);
3248        if (rc) {
3249                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3250                         rc);
3251                free_xid(xid);
3252                return rc;
3253        }
3254        rc = generic_file_mmap(file, vma);
3255        if (rc == 0)
3256                vma->vm_ops = &cifs_file_vm_ops;
3257        free_xid(xid);
3258        return rc;
3259}
3260
3261static void
3262cifs_readv_complete(struct work_struct *work)
3263{
3264        unsigned int i, got_bytes;
3265        struct cifs_readdata *rdata = container_of(work,
3266                                                struct cifs_readdata, work);
3267
3268        got_bytes = rdata->got_bytes;
3269        for (i = 0; i < rdata->nr_pages; i++) {
3270                struct page *page = rdata->pages[i];
3271
3272                lru_cache_add_file(page);
3273
3274                if (rdata->result == 0 ||
3275                    (rdata->result == -EAGAIN && got_bytes)) {
3276                        flush_dcache_page(page);
3277                        SetPageUptodate(page);
3278                }
3279
3280                unlock_page(page);
3281
3282                if (rdata->result == 0 ||
3283                    (rdata->result == -EAGAIN && got_bytes))
3284                        cifs_readpage_to_fscache(rdata->mapping->host, page);
3285
3286                got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3287
3288                put_page(page);
3289                rdata->pages[i] = NULL;
3290        }
3291        kref_put(&rdata->refcount, cifs_readdata_release);
3292}
3293
3294static int
3295cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3296                        struct cifs_readdata *rdata, unsigned int len)
3297{
3298        int result = 0;
3299        unsigned int i;
3300        u64 eof;
3301        pgoff_t eof_index;
3302        unsigned int nr_pages = rdata->nr_pages;
3303
3304        /* determine the eof that the server (probably) has */
3305        eof = CIFS_I(rdata->mapping->host)->server_eof;
3306        eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3307        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3308
3309        rdata->got_bytes = 0;
3310        rdata->tailsz = PAGE_SIZE;
3311        for (i = 0; i < nr_pages; i++) {
3312                struct page *page = rdata->pages[i];
3313                size_t n = PAGE_SIZE;
3314
3315                if (len >= PAGE_SIZE) {
3316                        len -= PAGE_SIZE;
3317                } else if (len > 0) {
3318                        /* enough for partial page, fill and zero the rest */
3319                        zero_user(page, len, PAGE_SIZE - len);
3320                        n = rdata->tailsz = len;
3321                        len = 0;
3322                } else if (page->index > eof_index) {
3323                        /*
3324                         * The VFS will not try to do readahead past the
3325                         * i_size, but it's possible that we have outstanding
3326                         * writes with gaps in the middle and the i_size hasn't
3327                         * caught up yet. Populate those with zeroed out pages
3328                         * to prevent the VFS from repeatedly attempting to
3329                         * fill them until the writes are flushed.
3330                         */
3331                        zero_user(page, 0, PAGE_SIZE);
3332                        lru_cache_add_file(page);
3333                        flush_dcache_page(page);
3334                        SetPageUptodate(page);
3335                        unlock_page(page);
3336                        put_page(page);
3337                        rdata->pages[i] = NULL;
3338                        rdata->nr_pages--;
3339                        continue;
3340                } else {
3341                        /* no need to hold page hostage */
3342                        lru_cache_add_file(page);
3343                        unlock_page(page);
3344                        put_page(page);
3345                        rdata->pages[i] = NULL;
3346                        rdata->nr_pages--;
3347                        continue;
3348                }
3349
3350                result = cifs_read_page_from_socket(server, page, n);
3351                if (result < 0)
3352                        break;
3353
3354                rdata->got_bytes += result;
3355        }
3356
3357        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3358                                                rdata->got_bytes : result;
3359}
3360
3361static int
3362readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3363                    unsigned int rsize, struct list_head *tmplist,
3364                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3365{
3366        struct page *page, *tpage;
3367        unsigned int expected_index;
3368        int rc;
3369        gfp_t gfp = readahead_gfp_mask(mapping);
3370
3371        INIT_LIST_HEAD(tmplist);
3372
3373        page = list_entry(page_list->prev, struct page, lru);
3374
3375        /*
3376         * Lock the page and put it in the cache. Since no one else
3377         * should have access to this page, we're safe to simply set
3378         * PG_locked without checking it first.
3379         */
3380        __SetPageLocked(page);
3381        rc = add_to_page_cache_locked(page, mapping,
3382                                      page->index, gfp);
3383
3384        /* give up if we can't stick it in the cache */
3385        if (rc) {
3386                __ClearPageLocked(page);
3387                return rc;
3388        }
3389
3390        /* move first page to the tmplist */
3391        *offset = (loff_t)page->index << PAGE_SHIFT;
3392        *bytes = PAGE_SIZE;
3393        *nr_pages = 1;
3394        list_move_tail(&page->lru, tmplist);
3395
3396        /* now try and add more pages onto the request */
3397        expected_index = page->index + 1;
3398        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3399                /* discontinuity ? */
3400                if (page->index != expected_index)
3401                        break;
3402
3403                /* would this page push the read over the rsize? */
3404                if (*bytes + PAGE_SIZE > rsize)
3405                        break;
3406
3407                __SetPageLocked(page);
3408                if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3409                        __ClearPageLocked(page);
3410                        break;
3411                }
3412                list_move_tail(&page->lru, tmplist);
3413                (*bytes) += PAGE_SIZE;
3414                expected_index++;
3415                (*nr_pages)++;
3416        }
3417        return rc;
3418}
3419
3420static int cifs_readpages(struct file *file, struct address_space *mapping,
3421        struct list_head *page_list, unsigned num_pages)
3422{
3423        int rc;
3424        struct list_head tmplist;
3425        struct cifsFileInfo *open_file = file->private_data;
3426        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3427        struct TCP_Server_Info *server;
3428        pid_t pid;
3429
3430        /*
3431         * Reads as many pages as possible from fscache. Returns -ENOBUFS
3432         * immediately if the cookie is negative
3433         *
3434         * After this point, every page in the list might have PG_fscache set,
3435         * so we will need to clean that up off of every page we don't use.
3436         */
3437        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3438                                         &num_pages);
3439        if (rc == 0)
3440                return rc;
3441
3442        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3443                pid = open_file->pid;
3444        else
3445                pid = current->tgid;
3446
3447        rc = 0;
3448        server = tlink_tcon(open_file->tlink)->ses->server;
3449
3450        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3451                 __func__, file, mapping, num_pages);
3452
3453        /*
3454         * Start with the page at end of list and move it to private
3455         * list. Do the same with any following pages until we hit
3456         * the rsize limit, hit an index discontinuity, or run out of
3457         * pages. Issue the async read and then start the loop again
3458         * until the list is empty.
3459         *
3460         * Note that list order is important. The page_list is in
3461         * the order of declining indexes. When we put the pages in
3462         * the rdata->pages, then we want them in increasing order.
3463         */
3464        while (!list_empty(page_list)) {
3465                unsigned int i, nr_pages, bytes, rsize;
3466                loff_t offset;
3467                struct page *page, *tpage;
3468                struct cifs_readdata *rdata;
3469                unsigned credits;
3470
3471                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3472                                                   &rsize, &credits);
3473                if (rc)
3474                        break;
3475
3476                /*
3477                 * Give up immediately if rsize is too small to read an entire
3478                 * page. The VFS will fall back to readpage. We should never
3479                 * reach this point however since we set ra_pages to 0 when the
3480                 * rsize is smaller than a cache page.
3481                 */
3482                if (unlikely(rsize < PAGE_SIZE)) {
3483                        add_credits_and_wake_if(server, credits, 0);
3484                        return 0;
3485                }
3486
3487                rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3488                                         &nr_pages, &offset, &bytes);
3489                if (rc) {
3490                        add_credits_and_wake_if(server, credits, 0);
3491                        break;
3492                }
3493
3494                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3495                if (!rdata) {
3496                        /* best to give up if we're out of mem */
3497                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3498                                list_del(&page->lru);
3499                                lru_cache_add_file(page);
3500                                unlock_page(page);
3501                                put_page(page);
3502                        }
3503                        rc = -ENOMEM;
3504                        add_credits_and_wake_if(server, credits, 0);
3505                        break;
3506                }
3507
3508                rdata->cfile = cifsFileInfo_get(open_file);
3509                rdata->mapping = mapping;
3510                rdata->offset = offset;
3511                rdata->bytes = bytes;
3512                rdata->pid = pid;
3513                rdata->pagesz = PAGE_SIZE;
3514                rdata->read_into_pages = cifs_readpages_read_into_pages;
3515                rdata->credits = credits;
3516
3517                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3518                        list_del(&page->lru);
3519                        rdata->pages[rdata->nr_pages++] = page;
3520                }
3521
3522                if (!rdata->cfile->invalidHandle ||
3523                    !cifs_reopen_file(rdata->cfile, true))
3524                        rc = server->ops->async_readv(rdata);
3525                if (rc) {
3526                        add_credits_and_wake_if(server, rdata->credits, 0);
3527                        for (i = 0; i < rdata->nr_pages; i++) {
3528                                page = rdata->pages[i];
3529                                lru_cache_add_file(page);
3530                                unlock_page(page);
3531                                put_page(page);
3532                        }
3533                        /* Fallback to the readpage in error/reconnect cases */
3534                        kref_put(&rdata->refcount, cifs_readdata_release);
3535                        break;
3536                }
3537
3538                kref_put(&rdata->refcount, cifs_readdata_release);
3539        }
3540
3541        /* Any pages that have been shown to fscache but didn't get added to
3542         * the pagecache must be uncached before they get returned to the
3543         * allocator.
3544         */
3545        cifs_fscache_readpages_cancel(mapping->host, page_list);
3546        return rc;
3547}
3548
3549/*
3550 * cifs_readpage_worker must be called with the page pinned
3551 */
3552static int cifs_readpage_worker(struct file *file, struct page *page,
3553        loff_t *poffset)
3554{
3555        char *read_data;
3556        int rc;
3557
3558        /* Is the page cached? */
3559        rc = cifs_readpage_from_fscache(file_inode(file), page);
3560        if (rc == 0)
3561                goto read_complete;
3562
3563        read_data = kmap(page);
3564        /* for reads over a certain size could initiate async read ahead */
3565
3566        rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3567
3568        if (rc < 0)
3569                goto io_error;
3570        else
3571                cifs_dbg(FYI, "Bytes read %d\n", rc);
3572
3573        file_inode(file)->i_atime =
3574                current_fs_time(file_inode(file)->i_sb);
3575
3576        if (PAGE_SIZE > rc)
3577                memset(read_data + rc, 0, PAGE_SIZE - rc);
3578
3579        flush_dcache_page(page);
3580        SetPageUptodate(page);
3581
3582        /* send this page to the cache */
3583        cifs_readpage_to_fscache(file_inode(file), page);
3584
3585        rc = 0;
3586
3587io_error:
3588        kunmap(page);
3589        unlock_page(page);
3590
3591read_complete:
3592        return rc;
3593}
3594
3595static int cifs_readpage(struct file *file, struct page *page)
3596{
3597        loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3598        int rc = -EACCES;
3599        unsigned int xid;
3600
3601        xid = get_xid();
3602
3603        if (file->private_data == NULL) {
3604                rc = -EBADF;
3605                free_xid(xid);
3606                return rc;
3607        }
3608
3609        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3610                 page, (int)offset, (int)offset);
3611
3612        rc = cifs_readpage_worker(file, page, &offset);
3613
3614        free_xid(xid);
3615        return rc;
3616}
3617
3618static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3619{
3620        struct cifsFileInfo *open_file;
3621
3622        spin_lock(&cifs_file_list_lock);
3623        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3624                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3625                        spin_unlock(&cifs_file_list_lock);
3626                        return 1;
3627                }
3628        }
3629        spin_unlock(&cifs_file_list_lock);
3630        return 0;
3631}
3632
3633/* We do not want to update the file size from server for inodes
3634   open for write - to avoid races with writepage extending
3635   the file - in the future we could consider allowing
3636   refreshing the inode only on increases in the file size
3637   but this is tricky to do without racing with writebehind
3638   page caching in the current Linux kernel design */
3639bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3640{
3641        if (!cifsInode)
3642                return true;
3643
3644        if (is_inode_writable(cifsInode)) {
3645                /* This inode is open for write at least once */
3646                struct cifs_sb_info *cifs_sb;
3647
3648                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3649                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3650                        /* since no page cache to corrupt on directio
3651                        we can change size safely */
3652                        return true;
3653                }
3654
3655                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3656                        return true;
3657
3658                return false;
3659        } else
3660                return true;
3661}
3662
3663static int cifs_write_begin(struct file *file, struct address_space *mapping,
3664                        loff_t pos, unsigned len, unsigned flags,
3665                        struct page **pagep, void **fsdata)
3666{
3667        int oncethru = 0;
3668        pgoff_t index = pos >> PAGE_SHIFT;
3669        loff_t offset = pos & (PAGE_SIZE - 1);
3670        loff_t page_start = pos & PAGE_MASK;
3671        loff_t i_size;
3672        struct page *page;
3673        int rc = 0;
3674
3675        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3676
3677start:
3678        page = grab_cache_page_write_begin(mapping, index, flags);
3679        if (!page) {
3680                rc = -ENOMEM;
3681                goto out;
3682        }
3683
3684        if (PageUptodate(page))
3685                goto out;
3686
3687        /*
3688         * If we write a full page it will be up to date, no need to read from
3689         * the server. If the write is short, we'll end up doing a sync write
3690         * instead.
3691         */
3692        if (len == PAGE_SIZE)
3693                goto out;
3694
3695        /*
3696         * optimize away the read when we have an oplock, and we're not
3697         * expecting to use any of the data we'd be reading in. That
3698         * is, when the page lies beyond the EOF, or straddles the EOF
3699         * and the write will cover all of the existing data.
3700         */
3701        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3702                i_size = i_size_read(mapping->host);
3703                if (page_start >= i_size ||
3704                    (offset == 0 && (pos + len) >= i_size)) {
3705                        zero_user_segments(page, 0, offset,
3706                                           offset + len,
3707                                           PAGE_SIZE);
3708                        /*
3709                         * PageChecked means that the parts of the page
3710                         * to which we're not writing are considered up
3711                         * to date. Once the data is copied to the
3712                         * page, it can be set uptodate.
3713                         */
3714                        SetPageChecked(page);
3715                        goto out;
3716                }
3717        }
3718
3719        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3720                /*
3721                 * might as well read a page, it is fast enough. If we get
3722                 * an error, we don't need to return it. cifs_write_end will
3723                 * do a sync write instead since PG_uptodate isn't set.
3724                 */
3725                cifs_readpage_worker(file, page, &page_start);
3726                put_page(page);
3727                oncethru = 1;
3728                goto start;
3729        } else {
3730                /* we could try using another file handle if there is one -
3731                   but how would we lock it to prevent close of that handle
3732                   racing with this read? In any case
3733                   this will be written out by write_end so is fine */
3734        }
3735out:
3736        *pagep = page;
3737        return rc;
3738}
3739
3740static int cifs_release_page(struct page *page, gfp_t gfp)
3741{
3742        if (PagePrivate(page))
3743                return 0;
3744
3745        return cifs_fscache_release_page(page, gfp);
3746}
3747
3748static void cifs_invalidate_page(struct page *page, unsigned int offset,
3749                                 unsigned int length)
3750{
3751        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3752
3753        if (offset == 0 && length == PAGE_SIZE)
3754                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3755}
3756
3757static int cifs_launder_page(struct page *page)
3758{
3759        int rc = 0;
3760        loff_t range_start = page_offset(page);
3761        loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
3762        struct writeback_control wbc = {
3763                .sync_mode = WB_SYNC_ALL,
3764                .nr_to_write = 0,
3765                .range_start = range_start,
3766                .range_end = range_end,
3767        };
3768
3769        cifs_dbg(FYI, "Launder page: %p\n", page);
3770
3771        if (clear_page_dirty_for_io(page))
3772                rc = cifs_writepage_locked(page, &wbc);
3773
3774        cifs_fscache_invalidate_page(page, page->mapping->host);
3775        return rc;
3776}
3777
3778void cifs_oplock_break(struct work_struct *work)
3779{
3780        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3781                                                  oplock_break);
3782        struct inode *inode = d_inode(cfile->dentry);
3783        struct cifsInodeInfo *cinode = CIFS_I(inode);
3784        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3785        struct TCP_Server_Info *server = tcon->ses->server;
3786        int rc = 0;
3787
3788        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3789                        TASK_UNINTERRUPTIBLE);
3790
3791        server->ops->downgrade_oplock(server, cinode,
3792                test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3793
3794        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3795                                                cifs_has_mand_locks(cinode)) {
3796                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3797                         inode);
3798                cinode->oplock = 0;
3799        }
3800
3801        if (inode && S_ISREG(inode->i_mode)) {
3802                if (CIFS_CACHE_READ(cinode))
3803                        break_lease(inode, O_RDONLY);
3804                else
3805                        break_lease(inode, O_WRONLY);
3806                rc = filemap_fdatawrite(inode->i_mapping);
3807                if (!CIFS_CACHE_READ(cinode)) {
3808                        rc = filemap_fdatawait(inode->i_mapping);
3809                        mapping_set_error(inode->i_mapping, rc);
3810                        cifs_zap_mapping(inode);
3811                }
3812                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3813        }
3814
3815        rc = cifs_push_locks(cfile);
3816        if (rc)
3817                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3818
3819        /*
3820         * releasing stale oplock after recent reconnect of smb session using
3821         * a now incorrect file handle is not a data integrity issue but do
3822         * not bother sending an oplock release if session to server still is
3823         * disconnected since oplock already released by the server
3824         */
3825        if (!cfile->oplock_break_cancelled) {
3826                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3827                                                             cinode);
3828                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3829        }
3830        cifs_done_oplock_break(cinode);
3831}
3832
3833/*
3834 * The presence of cifs_direct_io() in the address space ops vector
3835 * allowes open() O_DIRECT flags which would have failed otherwise.
3836 *
3837 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3838 * so this method should never be called.
3839 *
3840 * Direct IO is not yet supported in the cached mode. 
3841 */
3842static ssize_t
3843cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
3844{
3845        /*
3846         * FIXME
3847         * Eventually need to support direct IO for non forcedirectio mounts
3848         */
3849        return -EINVAL;
3850}
3851
3852
3853const struct address_space_operations cifs_addr_ops = {
3854        .readpage = cifs_readpage,
3855        .readpages = cifs_readpages,
3856        .writepage = cifs_writepage,
3857        .writepages = cifs_writepages,
3858        .write_begin = cifs_write_begin,
3859        .write_end = cifs_write_end,
3860        .set_page_dirty = __set_page_dirty_nobuffers,
3861        .releasepage = cifs_release_page,
3862        .direct_IO = cifs_direct_io,
3863        .invalidatepage = cifs_invalidate_page,
3864        .launder_page = cifs_launder_page,
3865};
3866
3867/*
3868 * cifs_readpages requires the server to support a buffer large enough to
3869 * contain the header plus one complete page of data.  Otherwise, we need
3870 * to leave cifs_readpages out of the address space operations.
3871 */
3872const struct address_space_operations cifs_addr_ops_smallbuf = {
3873        .readpage = cifs_readpage,
3874        .writepage = cifs_writepage,
3875        .writepages = cifs_writepages,
3876        .write_begin = cifs_write_begin,
3877        .write_end = cifs_write_end,
3878        .set_page_dirty = __set_page_dirty_nobuffers,
3879        .releasepage = cifs_release_page,
3880        .invalidatepage = cifs_invalidate_page,
3881        .launder_page = cifs_launder_page,
3882};
3883