linux/fs/cifs/file.c
<<
>>
Prefs
   1/*
   2 *   fs/cifs/file.c
   3 *
   4 *   vfs operations that deal with files
   5 *
   6 *   Copyright (C) International Business Machines  Corp., 2002,2010
   7 *   Author(s): Steve French (sfrench@us.ibm.com)
   8 *              Jeremy Allison (jra@samba.org)
   9 *
  10 *   This library is free software; you can redistribute it and/or modify
  11 *   it under the terms of the GNU Lesser General Public License as published
  12 *   by the Free Software Foundation; either version 2.1 of the License, or
  13 *   (at your option) any later version.
  14 *
  15 *   This library is distributed in the hope that it will be useful,
  16 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
  18 *   the GNU Lesser General Public License for more details.
  19 *
  20 *   You should have received a copy of the GNU Lesser General Public License
  21 *   along with this library; if not, write to the Free Software
  22 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23 */
  24#include <linux/fs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/stat.h>
  27#include <linux/fcntl.h>
  28#include <linux/pagemap.h>
  29#include <linux/pagevec.h>
  30#include <linux/writeback.h>
  31#include <linux/task_io_accounting_ops.h>
  32#include <linux/delay.h>
  33#include <linux/mount.h>
  34#include <linux/slab.h>
  35#include <linux/swap.h>
  36#include <asm/div64.h>
  37#include "cifsfs.h"
  38#include "cifspdu.h"
  39#include "cifsglob.h"
  40#include "cifsproto.h"
  41#include "cifs_unicode.h"
  42#include "cifs_debug.h"
  43#include "cifs_fs_sb.h"
  44#include "fscache.h"
  45
  46
  47static inline int cifs_convert_flags(unsigned int flags)
  48{
  49        if ((flags & O_ACCMODE) == O_RDONLY)
  50                return GENERIC_READ;
  51        else if ((flags & O_ACCMODE) == O_WRONLY)
  52                return GENERIC_WRITE;
  53        else if ((flags & O_ACCMODE) == O_RDWR) {
  54                /* GENERIC_ALL is too much permission to request
  55                   can cause unnecessary access denied on create */
  56                /* return GENERIC_ALL; */
  57                return (GENERIC_READ | GENERIC_WRITE);
  58        }
  59
  60        return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
  61                FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
  62                FILE_READ_DATA);
  63}
  64
  65static u32 cifs_posix_convert_flags(unsigned int flags)
  66{
  67        u32 posix_flags = 0;
  68
  69        if ((flags & O_ACCMODE) == O_RDONLY)
  70                posix_flags = SMB_O_RDONLY;
  71        else if ((flags & O_ACCMODE) == O_WRONLY)
  72                posix_flags = SMB_O_WRONLY;
  73        else if ((flags & O_ACCMODE) == O_RDWR)
  74                posix_flags = SMB_O_RDWR;
  75
  76        if (flags & O_CREAT) {
  77                posix_flags |= SMB_O_CREAT;
  78                if (flags & O_EXCL)
  79                        posix_flags |= SMB_O_EXCL;
  80        } else if (flags & O_EXCL)
  81                cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
  82                         current->comm, current->tgid);
  83
  84        if (flags & O_TRUNC)
  85                posix_flags |= SMB_O_TRUNC;
  86        /* be safe and imply O_SYNC for O_DSYNC */
  87        if (flags & O_DSYNC)
  88                posix_flags |= SMB_O_SYNC;
  89        if (flags & O_DIRECTORY)
  90                posix_flags |= SMB_O_DIRECTORY;
  91        if (flags & O_NOFOLLOW)
  92                posix_flags |= SMB_O_NOFOLLOW;
  93        if (flags & O_DIRECT)
  94                posix_flags |= SMB_O_DIRECT;
  95
  96        return posix_flags;
  97}
  98
  99static inline int cifs_get_disposition(unsigned int flags)
 100{
 101        if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
 102                return FILE_CREATE;
 103        else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
 104                return FILE_OVERWRITE_IF;
 105        else if ((flags & O_CREAT) == O_CREAT)
 106                return FILE_OPEN_IF;
 107        else if ((flags & O_TRUNC) == O_TRUNC)
 108                return FILE_OVERWRITE;
 109        else
 110                return FILE_OPEN;
 111}
 112
 113int cifs_posix_open(char *full_path, struct inode **pinode,
 114                        struct super_block *sb, int mode, unsigned int f_flags,
 115                        __u32 *poplock, __u16 *pnetfid, unsigned int xid)
 116{
 117        int rc;
 118        FILE_UNIX_BASIC_INFO *presp_data;
 119        __u32 posix_flags = 0;
 120        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 121        struct cifs_fattr fattr;
 122        struct tcon_link *tlink;
 123        struct cifs_tcon *tcon;
 124
 125        cifs_dbg(FYI, "posix open %s\n", full_path);
 126
 127        presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
 128        if (presp_data == NULL)
 129                return -ENOMEM;
 130
 131        tlink = cifs_sb_tlink(cifs_sb);
 132        if (IS_ERR(tlink)) {
 133                rc = PTR_ERR(tlink);
 134                goto posix_open_ret;
 135        }
 136
 137        tcon = tlink_tcon(tlink);
 138        mode &= ~current_umask();
 139
 140        posix_flags = cifs_posix_convert_flags(f_flags);
 141        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
 142                             poplock, full_path, cifs_sb->local_nls,
 143                             cifs_sb->mnt_cifs_flags &
 144                                        CIFS_MOUNT_MAP_SPECIAL_CHR);
 145        cifs_put_tlink(tlink);
 146
 147        if (rc)
 148                goto posix_open_ret;
 149
 150        if (presp_data->Type == cpu_to_le32(-1))
 151                goto posix_open_ret; /* open ok, caller does qpathinfo */
 152
 153        if (!pinode)
 154                goto posix_open_ret; /* caller does not need info */
 155
 156        cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
 157
 158        /* get new inode and set it up */
 159        if (*pinode == NULL) {
 160                cifs_fill_uniqueid(sb, &fattr);
 161                *pinode = cifs_iget(sb, &fattr);
 162                if (!*pinode) {
 163                        rc = -ENOMEM;
 164                        goto posix_open_ret;
 165                }
 166        } else {
 167                cifs_fattr_to_inode(*pinode, &fattr);
 168        }
 169
 170posix_open_ret:
 171        kfree(presp_data);
 172        return rc;
 173}
 174
 175static int
 176cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
 177             struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
 178             struct cifs_fid *fid, unsigned int xid)
 179{
 180        int rc;
 181        int desired_access;
 182        int disposition;
 183        int create_options = CREATE_NOT_DIR;
 184        FILE_ALL_INFO *buf;
 185        struct TCP_Server_Info *server = tcon->ses->server;
 186        struct cifs_open_parms oparms;
 187
 188        if (!server->ops->open)
 189                return -ENOSYS;
 190
 191        desired_access = cifs_convert_flags(f_flags);
 192
 193/*********************************************************************
 194 *  open flag mapping table:
 195 *
 196 *      POSIX Flag            CIFS Disposition
 197 *      ----------            ----------------
 198 *      O_CREAT               FILE_OPEN_IF
 199 *      O_CREAT | O_EXCL      FILE_CREATE
 200 *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
 201 *      O_TRUNC               FILE_OVERWRITE
 202 *      none of the above     FILE_OPEN
 203 *
 204 *      Note that there is not a direct match between disposition
 205 *      FILE_SUPERSEDE (ie create whether or not file exists although
 206 *      O_CREAT | O_TRUNC is similar but truncates the existing
 207 *      file rather than creating a new file as FILE_SUPERSEDE does
 208 *      (which uses the attributes / metadata passed in on open call)
 209 *?
 210 *?  O_SYNC is a reasonable match to CIFS writethrough flag
 211 *?  and the read write flags match reasonably.  O_LARGEFILE
 212 *?  is irrelevant because largefile support is always used
 213 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
 214 *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
 215 *********************************************************************/
 216
 217        disposition = cifs_get_disposition(f_flags);
 218
 219        /* BB pass O_SYNC flag through on file attributes .. BB */
 220
 221        buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
 222        if (!buf)
 223                return -ENOMEM;
 224
 225        if (backup_cred(cifs_sb))
 226                create_options |= CREATE_OPEN_BACKUP_INTENT;
 227
 228        oparms.tcon = tcon;
 229        oparms.cifs_sb = cifs_sb;
 230        oparms.desired_access = desired_access;
 231        oparms.create_options = create_options;
 232        oparms.disposition = disposition;
 233        oparms.path = full_path;
 234        oparms.fid = fid;
 235        oparms.reconnect = false;
 236
 237        rc = server->ops->open(xid, &oparms, oplock, buf);
 238
 239        if (rc)
 240                goto out;
 241
 242        if (tcon->unix_ext)
 243                rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
 244                                              xid);
 245        else
 246                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 247                                         xid, fid);
 248
 249out:
 250        kfree(buf);
 251        return rc;
 252}
 253
 254static bool
 255cifs_has_mand_locks(struct cifsInodeInfo *cinode)
 256{
 257        struct cifs_fid_locks *cur;
 258        bool has_locks = false;
 259
 260        down_read(&cinode->lock_sem);
 261        list_for_each_entry(cur, &cinode->llist, llist) {
 262                if (!list_empty(&cur->locks)) {
 263                        has_locks = true;
 264                        break;
 265                }
 266        }
 267        up_read(&cinode->lock_sem);
 268        return has_locks;
 269}
 270
 271struct cifsFileInfo *
 272cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 273                  struct tcon_link *tlink, __u32 oplock)
 274{
 275        struct dentry *dentry = file->f_path.dentry;
 276        struct inode *inode = dentry->d_inode;
 277        struct cifsInodeInfo *cinode = CIFS_I(inode);
 278        struct cifsFileInfo *cfile;
 279        struct cifs_fid_locks *fdlocks;
 280        struct cifs_tcon *tcon = tlink_tcon(tlink);
 281        struct TCP_Server_Info *server = tcon->ses->server;
 282
 283        cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 284        if (cfile == NULL)
 285                return cfile;
 286
 287        fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
 288        if (!fdlocks) {
 289                kfree(cfile);
 290                return NULL;
 291        }
 292
 293        INIT_LIST_HEAD(&fdlocks->locks);
 294        fdlocks->cfile = cfile;
 295        cfile->llist = fdlocks;
 296        down_write(&cinode->lock_sem);
 297        list_add(&fdlocks->llist, &cinode->llist);
 298        up_write(&cinode->lock_sem);
 299
 300        cfile->count = 1;
 301        cfile->pid = current->tgid;
 302        cfile->uid = current_fsuid();
 303        cfile->dentry = dget(dentry);
 304        cfile->f_flags = file->f_flags;
 305        cfile->invalidHandle = false;
 306        cfile->tlink = cifs_get_tlink(tlink);
 307        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 308        mutex_init(&cfile->fh_mutex);
 309
 310        cifs_sb_active(inode->i_sb);
 311
 312        /*
 313         * If the server returned a read oplock and we have mandatory brlocks,
 314         * set oplock level to None.
 315         */
 316        if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
 317                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
 318                oplock = 0;
 319        }
 320
 321        spin_lock(&cifs_file_list_lock);
 322        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
 323                oplock = fid->pending_open->oplock;
 324        list_del(&fid->pending_open->olist);
 325
 326        fid->purge_cache = false;
 327        server->ops->set_fid(cfile, fid, oplock);
 328
 329        list_add(&cfile->tlist, &tcon->openFileList);
 330        /* if readable file instance put first in list*/
 331        if (file->f_mode & FMODE_READ)
 332                list_add(&cfile->flist, &cinode->openFileList);
 333        else
 334                list_add_tail(&cfile->flist, &cinode->openFileList);
 335        spin_unlock(&cifs_file_list_lock);
 336
 337        if (fid->purge_cache)
 338                cifs_zap_mapping(inode);
 339
 340        file->private_data = cfile;
 341        return cfile;
 342}
 343
 344struct cifsFileInfo *
 345cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 346{
 347        spin_lock(&cifs_file_list_lock);
 348        cifsFileInfo_get_locked(cifs_file);
 349        spin_unlock(&cifs_file_list_lock);
 350        return cifs_file;
 351}
 352
 353/*
 354 * Release a reference on the file private data. This may involve closing
 355 * the filehandle out on the server. Must be called without holding
 356 * cifs_file_list_lock.
 357 */
 358void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 359{
 360        struct inode *inode = cifs_file->dentry->d_inode;
 361        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
 362        struct TCP_Server_Info *server = tcon->ses->server;
 363        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 364        struct super_block *sb = inode->i_sb;
 365        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 366        struct cifsLockInfo *li, *tmp;
 367        struct cifs_fid fid;
 368        struct cifs_pending_open open;
 369        bool oplock_break_cancelled;
 370
 371        spin_lock(&cifs_file_list_lock);
 372        if (--cifs_file->count > 0) {
 373                spin_unlock(&cifs_file_list_lock);
 374                return;
 375        }
 376
 377        if (server->ops->get_lease_key)
 378                server->ops->get_lease_key(inode, &fid);
 379
 380        /* store open in pending opens to make sure we don't miss lease break */
 381        cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
 382
 383        /* remove it from the lists */
 384        list_del(&cifs_file->flist);
 385        list_del(&cifs_file->tlist);
 386
 387        if (list_empty(&cifsi->openFileList)) {
 388                cifs_dbg(FYI, "closing last open instance for inode %p\n",
 389                         cifs_file->dentry->d_inode);
 390                /*
 391                 * In strict cache mode we need invalidate mapping on the last
 392                 * close  because it may cause a error when we open this file
 393                 * again and get at least level II oplock.
 394                 */
 395                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
 396                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
 397                cifs_set_oplock_level(cifsi, 0);
 398        }
 399        spin_unlock(&cifs_file_list_lock);
 400
 401        oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 402
 403        if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
 404                struct TCP_Server_Info *server = tcon->ses->server;
 405                unsigned int xid;
 406
 407                xid = get_xid();
 408                if (server->ops->close)
 409                        server->ops->close(xid, tcon, &cifs_file->fid);
 410                _free_xid(xid);
 411        }
 412
 413        if (oplock_break_cancelled)
 414                cifs_done_oplock_break(cifsi);
 415
 416        cifs_del_pending_open(&open);
 417
 418        /*
 419         * Delete any outstanding lock records. We'll lose them when the file
 420         * is closed anyway.
 421         */
 422        down_write(&cifsi->lock_sem);
 423        list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
 424                list_del(&li->llist);
 425                cifs_del_lock_waiters(li);
 426                kfree(li);
 427        }
 428        list_del(&cifs_file->llist->llist);
 429        kfree(cifs_file->llist);
 430        up_write(&cifsi->lock_sem);
 431
 432        cifs_put_tlink(cifs_file->tlink);
 433        dput(cifs_file->dentry);
 434        cifs_sb_deactive(sb);
 435        kfree(cifs_file);
 436}
 437
 438int cifs_open(struct inode *inode, struct file *file)
 439
 440{
 441        int rc = -EACCES;
 442        unsigned int xid;
 443        __u32 oplock;
 444        struct cifs_sb_info *cifs_sb;
 445        struct TCP_Server_Info *server;
 446        struct cifs_tcon *tcon;
 447        struct tcon_link *tlink;
 448        struct cifsFileInfo *cfile = NULL;
 449        char *full_path = NULL;
 450        bool posix_open_ok = false;
 451        struct cifs_fid fid;
 452        struct cifs_pending_open open;
 453
 454        xid = get_xid();
 455
 456        cifs_sb = CIFS_SB(inode->i_sb);
 457        tlink = cifs_sb_tlink(cifs_sb);
 458        if (IS_ERR(tlink)) {
 459                free_xid(xid);
 460                return PTR_ERR(tlink);
 461        }
 462        tcon = tlink_tcon(tlink);
 463        server = tcon->ses->server;
 464
 465        full_path = build_path_from_dentry(file->f_path.dentry);
 466        if (full_path == NULL) {
 467                rc = -ENOMEM;
 468                goto out;
 469        }
 470
 471        cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
 472                 inode, file->f_flags, full_path);
 473
 474        if (file->f_flags & O_DIRECT &&
 475            cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
 476                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 477                        file->f_op = &cifs_file_direct_nobrl_ops;
 478                else
 479                        file->f_op = &cifs_file_direct_ops;
 480        }
 481
 482        if (server->oplocks)
 483                oplock = REQ_OPLOCK;
 484        else
 485                oplock = 0;
 486
 487        if (!tcon->broken_posix_open && tcon->unix_ext &&
 488            cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 489                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 490                /* can not refresh inode info since size could be stale */
 491                rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 492                                cifs_sb->mnt_file_mode /* ignored */,
 493                                file->f_flags, &oplock, &fid.netfid, xid);
 494                if (rc == 0) {
 495                        cifs_dbg(FYI, "posix open succeeded\n");
 496                        posix_open_ok = true;
 497                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 498                        if (tcon->ses->serverNOS)
 499                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
 500                                         tcon->ses->serverName,
 501                                         tcon->ses->serverNOS);
 502                        tcon->broken_posix_open = true;
 503                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
 504                         (rc != -EOPNOTSUPP)) /* path not found or net err */
 505                        goto out;
 506                /*
 507                 * Else fallthrough to retry open the old way on network i/o
 508                 * or DFS errors.
 509                 */
 510        }
 511
 512        if (server->ops->get_lease_key)
 513                server->ops->get_lease_key(inode, &fid);
 514
 515        cifs_add_pending_open(&fid, tlink, &open);
 516
 517        if (!posix_open_ok) {
 518                if (server->ops->get_lease_key)
 519                        server->ops->get_lease_key(inode, &fid);
 520
 521                rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
 522                                  file->f_flags, &oplock, &fid, xid);
 523                if (rc) {
 524                        cifs_del_pending_open(&open);
 525                        goto out;
 526                }
 527        }
 528
 529        cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
 530        if (cfile == NULL) {
 531                if (server->ops->close)
 532                        server->ops->close(xid, tcon, &fid);
 533                cifs_del_pending_open(&open);
 534                rc = -ENOMEM;
 535                goto out;
 536        }
 537
 538        cifs_fscache_set_inode_cookie(inode, file);
 539
 540        if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
 541                /*
 542                 * Time to set mode which we can not set earlier due to
 543                 * problems creating new read-only files.
 544                 */
 545                struct cifs_unix_set_info_args args = {
 546                        .mode   = inode->i_mode,
 547                        .uid    = INVALID_UID, /* no change */
 548                        .gid    = INVALID_GID, /* no change */
 549                        .ctime  = NO_CHANGE_64,
 550                        .atime  = NO_CHANGE_64,
 551                        .mtime  = NO_CHANGE_64,
 552                        .device = 0,
 553                };
 554                CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
 555                                       cfile->pid);
 556        }
 557
 558out:
 559        kfree(full_path);
 560        free_xid(xid);
 561        cifs_put_tlink(tlink);
 562        return rc;
 563}
 564
 565static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
 566
 567/*
 568 * Try to reacquire byte range locks that were released when session
 569 * to server was lost.
 570 */
 571static int
 572cifs_relock_file(struct cifsFileInfo *cfile)
 573{
 574        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
 575        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 576        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 577        int rc = 0;
 578
 579        down_read(&cinode->lock_sem);
 580        if (cinode->can_cache_brlcks) {
 581                /* can cache locks - no need to relock */
 582                up_read(&cinode->lock_sem);
 583                return rc;
 584        }
 585
 586        if (cap_unix(tcon->ses) &&
 587            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
 588            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
 589                rc = cifs_push_posix_locks(cfile);
 590        else
 591                rc = tcon->ses->server->ops->push_mand_locks(cfile);
 592
 593        up_read(&cinode->lock_sem);
 594        return rc;
 595}
 596
 597static int
 598cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 599{
 600        int rc = -EACCES;
 601        unsigned int xid;
 602        __u32 oplock;
 603        struct cifs_sb_info *cifs_sb;
 604        struct cifs_tcon *tcon;
 605        struct TCP_Server_Info *server;
 606        struct cifsInodeInfo *cinode;
 607        struct inode *inode;
 608        char *full_path = NULL;
 609        int desired_access;
 610        int disposition = FILE_OPEN;
 611        int create_options = CREATE_NOT_DIR;
 612        struct cifs_open_parms oparms;
 613
 614        xid = get_xid();
 615        mutex_lock(&cfile->fh_mutex);
 616        if (!cfile->invalidHandle) {
 617                mutex_unlock(&cfile->fh_mutex);
 618                rc = 0;
 619                free_xid(xid);
 620                return rc;
 621        }
 622
 623        inode = cfile->dentry->d_inode;
 624        cifs_sb = CIFS_SB(inode->i_sb);
 625        tcon = tlink_tcon(cfile->tlink);
 626        server = tcon->ses->server;
 627
 628        /*
 629         * Can not grab rename sem here because various ops, including those
 630         * that already have the rename sem can end up causing writepage to get
 631         * called and if the server was down that means we end up here, and we
 632         * can never tell if the caller already has the rename_sem.
 633         */
 634        full_path = build_path_from_dentry(cfile->dentry);
 635        if (full_path == NULL) {
 636                rc = -ENOMEM;
 637                mutex_unlock(&cfile->fh_mutex);
 638                free_xid(xid);
 639                return rc;
 640        }
 641
 642        cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
 643                 inode, cfile->f_flags, full_path);
 644
 645        if (tcon->ses->server->oplocks)
 646                oplock = REQ_OPLOCK;
 647        else
 648                oplock = 0;
 649
 650        if (tcon->unix_ext && cap_unix(tcon->ses) &&
 651            (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 652                                le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 653                /*
 654                 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
 655                 * original open. Must mask them off for a reopen.
 656                 */
 657                unsigned int oflags = cfile->f_flags &
 658                                                ~(O_CREAT | O_EXCL | O_TRUNC);
 659
 660                rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 661                                     cifs_sb->mnt_file_mode /* ignored */,
 662                                     oflags, &oplock, &cfile->fid.netfid, xid);
 663                if (rc == 0) {
 664                        cifs_dbg(FYI, "posix reopen succeeded\n");
 665                        oparms.reconnect = true;
 666                        goto reopen_success;
 667                }
 668                /*
 669                 * fallthrough to retry open the old way on errors, especially
 670                 * in the reconnect path it is important to retry hard
 671                 */
 672        }
 673
 674        desired_access = cifs_convert_flags(cfile->f_flags);
 675
 676        if (backup_cred(cifs_sb))
 677                create_options |= CREATE_OPEN_BACKUP_INTENT;
 678
 679        if (server->ops->get_lease_key)
 680                server->ops->get_lease_key(inode, &cfile->fid);
 681
 682        oparms.tcon = tcon;
 683        oparms.cifs_sb = cifs_sb;
 684        oparms.desired_access = desired_access;
 685        oparms.create_options = create_options;
 686        oparms.disposition = disposition;
 687        oparms.path = full_path;
 688        oparms.fid = &cfile->fid;
 689        oparms.reconnect = true;
 690
 691        /*
 692         * Can not refresh inode by passing in file_info buf to be returned by
 693         * ops->open and then calling get_inode_info with returned buf since
 694         * file might have write behind data that needs to be flushed and server
 695         * version of file size can be stale. If we knew for sure that inode was
 696         * not dirty locally we could do this.
 697         */
 698        rc = server->ops->open(xid, &oparms, &oplock, NULL);
 699        if (rc == -ENOENT && oparms.reconnect == false) {
 700                /* durable handle timeout is expired - open the file again */
 701                rc = server->ops->open(xid, &oparms, &oplock, NULL);
 702                /* indicate that we need to relock the file */
 703                oparms.reconnect = true;
 704        }
 705
 706        if (rc) {
 707                mutex_unlock(&cfile->fh_mutex);
 708                cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
 709                cifs_dbg(FYI, "oplock: %d\n", oplock);
 710                goto reopen_error_exit;
 711        }
 712
 713reopen_success:
 714        cfile->invalidHandle = false;
 715        mutex_unlock(&cfile->fh_mutex);
 716        cinode = CIFS_I(inode);
 717
 718        if (can_flush) {
 719                rc = filemap_write_and_wait(inode->i_mapping);
 720                mapping_set_error(inode->i_mapping, rc);
 721
 722                if (tcon->unix_ext)
 723                        rc = cifs_get_inode_info_unix(&inode, full_path,
 724                                                      inode->i_sb, xid);
 725                else
 726                        rc = cifs_get_inode_info(&inode, full_path, NULL,
 727                                                 inode->i_sb, xid, NULL);
 728        }
 729        /*
 730         * Else we are writing out data to server already and could deadlock if
 731         * we tried to flush data, and since we do not know if we have data that
 732         * would invalidate the current end of file on the server we can not go
 733         * to the server to get the new inode info.
 734         */
 735
 736        server->ops->set_fid(cfile, &cfile->fid, oplock);
 737        if (oparms.reconnect)
 738                cifs_relock_file(cfile);
 739
 740reopen_error_exit:
 741        kfree(full_path);
 742        free_xid(xid);
 743        return rc;
 744}
 745
 746int cifs_close(struct inode *inode, struct file *file)
 747{
 748        if (file->private_data != NULL) {
 749                cifsFileInfo_put(file->private_data);
 750                file->private_data = NULL;
 751        }
 752
 753        /* return code from the ->release op is always ignored */
 754        return 0;
 755}
 756
 757int cifs_closedir(struct inode *inode, struct file *file)
 758{
 759        int rc = 0;
 760        unsigned int xid;
 761        struct cifsFileInfo *cfile = file->private_data;
 762        struct cifs_tcon *tcon;
 763        struct TCP_Server_Info *server;
 764        char *buf;
 765
 766        cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 767
 768        if (cfile == NULL)
 769                return rc;
 770
 771        xid = get_xid();
 772        tcon = tlink_tcon(cfile->tlink);
 773        server = tcon->ses->server;
 774
 775        cifs_dbg(FYI, "Freeing private data in close dir\n");
 776        spin_lock(&cifs_file_list_lock);
 777        if (server->ops->dir_needs_close(cfile)) {
 778                cfile->invalidHandle = true;
 779                spin_unlock(&cifs_file_list_lock);
 780                if (server->ops->close_dir)
 781                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
 782                else
 783                        rc = -ENOSYS;
 784                cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
 785                /* not much we can do if it fails anyway, ignore rc */
 786                rc = 0;
 787        } else
 788                spin_unlock(&cifs_file_list_lock);
 789
 790        buf = cfile->srch_inf.ntwrk_buf_start;
 791        if (buf) {
 792                cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
 793                cfile->srch_inf.ntwrk_buf_start = NULL;
 794                if (cfile->srch_inf.smallBuf)
 795                        cifs_small_buf_release(buf);
 796                else
 797                        cifs_buf_release(buf);
 798        }
 799
 800        cifs_put_tlink(cfile->tlink);
 801        kfree(file->private_data);
 802        file->private_data = NULL;
 803        /* BB can we lock the filestruct while this is going on? */
 804        free_xid(xid);
 805        return rc;
 806}
 807
 808static struct cifsLockInfo *
 809cifs_lock_init(__u64 offset, __u64 length, __u8 type)
 810{
 811        struct cifsLockInfo *lock =
 812                kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
 813        if (!lock)
 814                return lock;
 815        lock->offset = offset;
 816        lock->length = length;
 817        lock->type = type;
 818        lock->pid = current->tgid;
 819        INIT_LIST_HEAD(&lock->blist);
 820        init_waitqueue_head(&lock->block_q);
 821        return lock;
 822}
 823
 824void
 825cifs_del_lock_waiters(struct cifsLockInfo *lock)
 826{
 827        struct cifsLockInfo *li, *tmp;
 828        list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
 829                list_del_init(&li->blist);
 830                wake_up(&li->block_q);
 831        }
 832}
 833
 834#define CIFS_LOCK_OP    0
 835#define CIFS_READ_OP    1
 836#define CIFS_WRITE_OP   2
 837
 838/* @rw_check : 0 - no op, 1 - read, 2 - write */
 839static bool
 840cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
 841                            __u64 length, __u8 type, struct cifsFileInfo *cfile,
 842                            struct cifsLockInfo **conf_lock, int rw_check)
 843{
 844        struct cifsLockInfo *li;
 845        struct cifsFileInfo *cur_cfile = fdlocks->cfile;
 846        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 847
 848        list_for_each_entry(li, &fdlocks->locks, llist) {
 849                if (offset + length <= li->offset ||
 850                    offset >= li->offset + li->length)
 851                        continue;
 852                if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
 853                    server->ops->compare_fids(cfile, cur_cfile)) {
 854                        /* shared lock prevents write op through the same fid */
 855                        if (!(li->type & server->vals->shared_lock_type) ||
 856                            rw_check != CIFS_WRITE_OP)
 857                                continue;
 858                }
 859                if ((type & server->vals->shared_lock_type) &&
 860                    ((server->ops->compare_fids(cfile, cur_cfile) &&
 861                     current->tgid == li->pid) || type == li->type))
 862                        continue;
 863                if (conf_lock)
 864                        *conf_lock = li;
 865                return true;
 866        }
 867        return false;
 868}
 869
 870bool
 871cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 872                        __u8 type, struct cifsLockInfo **conf_lock,
 873                        int rw_check)
 874{
 875        bool rc = false;
 876        struct cifs_fid_locks *cur;
 877        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 878
 879        list_for_each_entry(cur, &cinode->llist, llist) {
 880                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
 881                                                 cfile, conf_lock, rw_check);
 882                if (rc)
 883                        break;
 884        }
 885
 886        return rc;
 887}
 888
 889/*
 890 * Check if there is another lock that prevents us to set the lock (mandatory
 891 * style). If such a lock exists, update the flock structure with its
 892 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 893 * or leave it the same if we can't. Returns 0 if we don't need to request to
 894 * the server or 1 otherwise.
 895 */
 896static int
 897cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 898               __u8 type, struct file_lock *flock)
 899{
 900        int rc = 0;
 901        struct cifsLockInfo *conf_lock;
 902        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 903        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 904        bool exist;
 905
 906        down_read(&cinode->lock_sem);
 907
 908        exist = cifs_find_lock_conflict(cfile, offset, length, type,
 909                                        &conf_lock, CIFS_LOCK_OP);
 910        if (exist) {
 911                flock->fl_start = conf_lock->offset;
 912                flock->fl_end = conf_lock->offset + conf_lock->length - 1;
 913                flock->fl_pid = conf_lock->pid;
 914                if (conf_lock->type & server->vals->shared_lock_type)
 915                        flock->fl_type = F_RDLCK;
 916                else
 917                        flock->fl_type = F_WRLCK;
 918        } else if (!cinode->can_cache_brlcks)
 919                rc = 1;
 920        else
 921                flock->fl_type = F_UNLCK;
 922
 923        up_read(&cinode->lock_sem);
 924        return rc;
 925}
 926
 927static void
 928cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 929{
 930        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 931        down_write(&cinode->lock_sem);
 932        list_add_tail(&lock->llist, &cfile->llist->locks);
 933        up_write(&cinode->lock_sem);
 934}
 935
 936/*
 937 * Set the byte-range lock (mandatory style). Returns:
 938 * 1) 0, if we set the lock and don't need to request to the server;
 939 * 2) 1, if no locks prevent us but we need to request to the server;
 940 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
 941 */
 942static int
 943cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
 944                 bool wait)
 945{
 946        struct cifsLockInfo *conf_lock;
 947        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
 948        bool exist;
 949        int rc = 0;
 950
 951try_again:
 952        exist = false;
 953        down_write(&cinode->lock_sem);
 954
 955        exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
 956                                        lock->type, &conf_lock, CIFS_LOCK_OP);
 957        if (!exist && cinode->can_cache_brlcks) {
 958                list_add_tail(&lock->llist, &cfile->llist->locks);
 959                up_write(&cinode->lock_sem);
 960                return rc;
 961        }
 962
 963        if (!exist)
 964                rc = 1;
 965        else if (!wait)
 966                rc = -EACCES;
 967        else {
 968                list_add_tail(&lock->blist, &conf_lock->blist);
 969                up_write(&cinode->lock_sem);
 970                rc = wait_event_interruptible(lock->block_q,
 971                                        (lock->blist.prev == &lock->blist) &&
 972                                        (lock->blist.next == &lock->blist));
 973                if (!rc)
 974                        goto try_again;
 975                down_write(&cinode->lock_sem);
 976                list_del_init(&lock->blist);
 977        }
 978
 979        up_write(&cinode->lock_sem);
 980        return rc;
 981}
 982
 983/*
 984 * Check if there is another lock that prevents us to set the lock (posix
 985 * style). If such a lock exists, update the flock structure with its
 986 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
 987 * or leave it the same if we can't. Returns 0 if we don't need to request to
 988 * the server or 1 otherwise.
 989 */
 990static int
 991cifs_posix_lock_test(struct file *file, struct file_lock *flock)
 992{
 993        int rc = 0;
 994        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
 995        unsigned char saved_type = flock->fl_type;
 996
 997        if ((flock->fl_flags & FL_POSIX) == 0)
 998                return 1;
 999
1000        down_read(&cinode->lock_sem);
1001        posix_test_lock(file, flock);
1002
1003        if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1004                flock->fl_type = saved_type;
1005                rc = 1;
1006        }
1007
1008        up_read(&cinode->lock_sem);
1009        return rc;
1010}
1011
1012/*
1013 * Set the byte-range lock (posix style). Returns:
1014 * 1) 0, if we set the lock and don't need to request to the server;
1015 * 2) 1, if we need to request to the server;
1016 * 3) <0, if the error occurs while setting the lock.
1017 */
1018static int
1019cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1020{
1021        struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1022        int rc = 1;
1023
1024        if ((flock->fl_flags & FL_POSIX) == 0)
1025                return rc;
1026
1027try_again:
1028        down_write(&cinode->lock_sem);
1029        if (!cinode->can_cache_brlcks) {
1030                up_write(&cinode->lock_sem);
1031                return rc;
1032        }
1033
1034        rc = posix_lock_file(file, flock, NULL);
1035        up_write(&cinode->lock_sem);
1036        if (rc == FILE_LOCK_DEFERRED) {
1037                rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1038                if (!rc)
1039                        goto try_again;
1040                posix_unblock_lock(flock);
1041        }
1042        return rc;
1043}
1044
1045int
1046cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1047{
1048        unsigned int xid;
1049        int rc = 0, stored_rc;
1050        struct cifsLockInfo *li, *tmp;
1051        struct cifs_tcon *tcon;
1052        unsigned int num, max_num, max_buf;
1053        LOCKING_ANDX_RANGE *buf, *cur;
1054        int types[] = {LOCKING_ANDX_LARGE_FILES,
1055                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1056        int i;
1057
1058        xid = get_xid();
1059        tcon = tlink_tcon(cfile->tlink);
1060
1061        /*
1062         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1063         * and check it for zero before using.
1064         */
1065        max_buf = tcon->ses->server->maxBuf;
1066        if (!max_buf) {
1067                free_xid(xid);
1068                return -EINVAL;
1069        }
1070
1071        max_num = (max_buf - sizeof(struct smb_hdr)) /
1072                                                sizeof(LOCKING_ANDX_RANGE);
1073        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1074        if (!buf) {
1075                free_xid(xid);
1076                return -ENOMEM;
1077        }
1078
1079        for (i = 0; i < 2; i++) {
1080                cur = buf;
1081                num = 0;
1082                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1083                        if (li->type != types[i])
1084                                continue;
1085                        cur->Pid = cpu_to_le16(li->pid);
1086                        cur->LengthLow = cpu_to_le32((u32)li->length);
1087                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1088                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1089                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1090                        if (++num == max_num) {
1091                                stored_rc = cifs_lockv(xid, tcon,
1092                                                       cfile->fid.netfid,
1093                                                       (__u8)li->type, 0, num,
1094                                                       buf);
1095                                if (stored_rc)
1096                                        rc = stored_rc;
1097                                cur = buf;
1098                                num = 0;
1099                        } else
1100                                cur++;
1101                }
1102
1103                if (num) {
1104                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1105                                               (__u8)types[i], 0, num, buf);
1106                        if (stored_rc)
1107                                rc = stored_rc;
1108                }
1109        }
1110
1111        kfree(buf);
1112        free_xid(xid);
1113        return rc;
1114}
1115
1116struct lock_to_push {
1117        struct list_head llist;
1118        __u64 offset;
1119        __u64 length;
1120        __u32 pid;
1121        __u16 netfid;
1122        __u8 type;
1123};
1124
1125static int
1126cifs_push_posix_locks(struct cifsFileInfo *cfile)
1127{
1128        struct inode *inode = cfile->dentry->d_inode;
1129        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1130        struct file_lock *flock;
1131        struct file_lock_context *flctx = inode->i_flctx;
1132        unsigned int count = 0, i;
1133        int rc = 0, xid, type;
1134        struct list_head locks_to_send, *el;
1135        struct lock_to_push *lck, *tmp;
1136        __u64 length;
1137
1138        xid = get_xid();
1139
1140        if (!flctx)
1141                goto out;
1142
1143        spin_lock(&flctx->flc_lock);
1144        list_for_each(el, &flctx->flc_posix) {
1145                count++;
1146        }
1147        spin_unlock(&flctx->flc_lock);
1148
1149        INIT_LIST_HEAD(&locks_to_send);
1150
1151        /*
1152         * Allocating count locks is enough because no FL_POSIX locks can be
1153         * added to the list while we are holding cinode->lock_sem that
1154         * protects locking operations of this inode.
1155         */
1156        for (i = 0; i < count; i++) {
1157                lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1158                if (!lck) {
1159                        rc = -ENOMEM;
1160                        goto err_out;
1161                }
1162                list_add_tail(&lck->llist, &locks_to_send);
1163        }
1164
1165        el = locks_to_send.next;
1166        spin_lock(&flctx->flc_lock);
1167        list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1168                if (el == &locks_to_send) {
1169                        /*
1170                         * The list ended. We don't have enough allocated
1171                         * structures - something is really wrong.
1172                         */
1173                        cifs_dbg(VFS, "Can't push all brlocks!\n");
1174                        break;
1175                }
1176                length = 1 + flock->fl_end - flock->fl_start;
1177                if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1178                        type = CIFS_RDLCK;
1179                else
1180                        type = CIFS_WRLCK;
1181                lck = list_entry(el, struct lock_to_push, llist);
1182                lck->pid = flock->fl_pid;
1183                lck->netfid = cfile->fid.netfid;
1184                lck->length = length;
1185                lck->type = type;
1186                lck->offset = flock->fl_start;
1187        }
1188        spin_unlock(&flctx->flc_lock);
1189
1190        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1191                int stored_rc;
1192
1193                stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1194                                             lck->offset, lck->length, NULL,
1195                                             lck->type, 0);
1196                if (stored_rc)
1197                        rc = stored_rc;
1198                list_del(&lck->llist);
1199                kfree(lck);
1200        }
1201
1202out:
1203        free_xid(xid);
1204        return rc;
1205err_out:
1206        list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1207                list_del(&lck->llist);
1208                kfree(lck);
1209        }
1210        goto out;
1211}
1212
1213static int
1214cifs_push_locks(struct cifsFileInfo *cfile)
1215{
1216        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1217        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1218        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1219        int rc = 0;
1220
1221        /* we are going to update can_cache_brlcks here - need a write access */
1222        down_write(&cinode->lock_sem);
1223        if (!cinode->can_cache_brlcks) {
1224                up_write(&cinode->lock_sem);
1225                return rc;
1226        }
1227
1228        if (cap_unix(tcon->ses) &&
1229            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1230            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1231                rc = cifs_push_posix_locks(cfile);
1232        else
1233                rc = tcon->ses->server->ops->push_mand_locks(cfile);
1234
1235        cinode->can_cache_brlcks = false;
1236        up_write(&cinode->lock_sem);
1237        return rc;
1238}
1239
1240static void
1241cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1242                bool *wait_flag, struct TCP_Server_Info *server)
1243{
1244        if (flock->fl_flags & FL_POSIX)
1245                cifs_dbg(FYI, "Posix\n");
1246        if (flock->fl_flags & FL_FLOCK)
1247                cifs_dbg(FYI, "Flock\n");
1248        if (flock->fl_flags & FL_SLEEP) {
1249                cifs_dbg(FYI, "Blocking lock\n");
1250                *wait_flag = true;
1251        }
1252        if (flock->fl_flags & FL_ACCESS)
1253                cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1254        if (flock->fl_flags & FL_LEASE)
1255                cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1256        if (flock->fl_flags &
1257            (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1258               FL_ACCESS | FL_LEASE | FL_CLOSE)))
1259                cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1260
1261        *type = server->vals->large_lock_type;
1262        if (flock->fl_type == F_WRLCK) {
1263                cifs_dbg(FYI, "F_WRLCK\n");
1264                *type |= server->vals->exclusive_lock_type;
1265                *lock = 1;
1266        } else if (flock->fl_type == F_UNLCK) {
1267                cifs_dbg(FYI, "F_UNLCK\n");
1268                *type |= server->vals->unlock_lock_type;
1269                *unlock = 1;
1270                /* Check if unlock includes more than one lock range */
1271        } else if (flock->fl_type == F_RDLCK) {
1272                cifs_dbg(FYI, "F_RDLCK\n");
1273                *type |= server->vals->shared_lock_type;
1274                *lock = 1;
1275        } else if (flock->fl_type == F_EXLCK) {
1276                cifs_dbg(FYI, "F_EXLCK\n");
1277                *type |= server->vals->exclusive_lock_type;
1278                *lock = 1;
1279        } else if (flock->fl_type == F_SHLCK) {
1280                cifs_dbg(FYI, "F_SHLCK\n");
1281                *type |= server->vals->shared_lock_type;
1282                *lock = 1;
1283        } else
1284                cifs_dbg(FYI, "Unknown type of lock\n");
1285}
1286
1287static int
1288cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1289           bool wait_flag, bool posix_lck, unsigned int xid)
1290{
1291        int rc = 0;
1292        __u64 length = 1 + flock->fl_end - flock->fl_start;
1293        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1294        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1295        struct TCP_Server_Info *server = tcon->ses->server;
1296        __u16 netfid = cfile->fid.netfid;
1297
1298        if (posix_lck) {
1299                int posix_lock_type;
1300
1301                rc = cifs_posix_lock_test(file, flock);
1302                if (!rc)
1303                        return rc;
1304
1305                if (type & server->vals->shared_lock_type)
1306                        posix_lock_type = CIFS_RDLCK;
1307                else
1308                        posix_lock_type = CIFS_WRLCK;
1309                rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1310                                      flock->fl_start, length, flock,
1311                                      posix_lock_type, wait_flag);
1312                return rc;
1313        }
1314
1315        rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1316        if (!rc)
1317                return rc;
1318
1319        /* BB we could chain these into one lock request BB */
1320        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1321                                    1, 0, false);
1322        if (rc == 0) {
1323                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1324                                            type, 0, 1, false);
1325                flock->fl_type = F_UNLCK;
1326                if (rc != 0)
1327                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1328                                 rc);
1329                return 0;
1330        }
1331
1332        if (type & server->vals->shared_lock_type) {
1333                flock->fl_type = F_WRLCK;
1334                return 0;
1335        }
1336
1337        type &= ~server->vals->exclusive_lock_type;
1338
1339        rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1340                                    type | server->vals->shared_lock_type,
1341                                    1, 0, false);
1342        if (rc == 0) {
1343                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1344                        type | server->vals->shared_lock_type, 0, 1, false);
1345                flock->fl_type = F_RDLCK;
1346                if (rc != 0)
1347                        cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1348                                 rc);
1349        } else
1350                flock->fl_type = F_WRLCK;
1351
1352        return 0;
1353}
1354
1355void
1356cifs_move_llist(struct list_head *source, struct list_head *dest)
1357{
1358        struct list_head *li, *tmp;
1359        list_for_each_safe(li, tmp, source)
1360                list_move(li, dest);
1361}
1362
1363void
1364cifs_free_llist(struct list_head *llist)
1365{
1366        struct cifsLockInfo *li, *tmp;
1367        list_for_each_entry_safe(li, tmp, llist, llist) {
1368                cifs_del_lock_waiters(li);
1369                list_del(&li->llist);
1370                kfree(li);
1371        }
1372}
1373
1374int
1375cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1376                  unsigned int xid)
1377{
1378        int rc = 0, stored_rc;
1379        int types[] = {LOCKING_ANDX_LARGE_FILES,
1380                       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1381        unsigned int i;
1382        unsigned int max_num, num, max_buf;
1383        LOCKING_ANDX_RANGE *buf, *cur;
1384        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1385        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1386        struct cifsLockInfo *li, *tmp;
1387        __u64 length = 1 + flock->fl_end - flock->fl_start;
1388        struct list_head tmp_llist;
1389
1390        INIT_LIST_HEAD(&tmp_llist);
1391
1392        /*
1393         * Accessing maxBuf is racy with cifs_reconnect - need to store value
1394         * and check it for zero before using.
1395         */
1396        max_buf = tcon->ses->server->maxBuf;
1397        if (!max_buf)
1398                return -EINVAL;
1399
1400        max_num = (max_buf - sizeof(struct smb_hdr)) /
1401                                                sizeof(LOCKING_ANDX_RANGE);
1402        buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1403        if (!buf)
1404                return -ENOMEM;
1405
1406        down_write(&cinode->lock_sem);
1407        for (i = 0; i < 2; i++) {
1408                cur = buf;
1409                num = 0;
1410                list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1411                        if (flock->fl_start > li->offset ||
1412                            (flock->fl_start + length) <
1413                            (li->offset + li->length))
1414                                continue;
1415                        if (current->tgid != li->pid)
1416                                continue;
1417                        if (types[i] != li->type)
1418                                continue;
1419                        if (cinode->can_cache_brlcks) {
1420                                /*
1421                                 * We can cache brlock requests - simply remove
1422                                 * a lock from the file's list.
1423                                 */
1424                                list_del(&li->llist);
1425                                cifs_del_lock_waiters(li);
1426                                kfree(li);
1427                                continue;
1428                        }
1429                        cur->Pid = cpu_to_le16(li->pid);
1430                        cur->LengthLow = cpu_to_le32((u32)li->length);
1431                        cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1432                        cur->OffsetLow = cpu_to_le32((u32)li->offset);
1433                        cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1434                        /*
1435                         * We need to save a lock here to let us add it again to
1436                         * the file's list if the unlock range request fails on
1437                         * the server.
1438                         */
1439                        list_move(&li->llist, &tmp_llist);
1440                        if (++num == max_num) {
1441                                stored_rc = cifs_lockv(xid, tcon,
1442                                                       cfile->fid.netfid,
1443                                                       li->type, num, 0, buf);
1444                                if (stored_rc) {
1445                                        /*
1446                                         * We failed on the unlock range
1447                                         * request - add all locks from the tmp
1448                                         * list to the head of the file's list.
1449                                         */
1450                                        cifs_move_llist(&tmp_llist,
1451                                                        &cfile->llist->locks);
1452                                        rc = stored_rc;
1453                                } else
1454                                        /*
1455                                         * The unlock range request succeed -
1456                                         * free the tmp list.
1457                                         */
1458                                        cifs_free_llist(&tmp_llist);
1459                                cur = buf;
1460                                num = 0;
1461                        } else
1462                                cur++;
1463                }
1464                if (num) {
1465                        stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1466                                               types[i], num, 0, buf);
1467                        if (stored_rc) {
1468                                cifs_move_llist(&tmp_llist,
1469                                                &cfile->llist->locks);
1470                                rc = stored_rc;
1471                        } else
1472                                cifs_free_llist(&tmp_llist);
1473                }
1474        }
1475
1476        up_write(&cinode->lock_sem);
1477        kfree(buf);
1478        return rc;
1479}
1480
1481static int
1482cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1483           bool wait_flag, bool posix_lck, int lock, int unlock,
1484           unsigned int xid)
1485{
1486        int rc = 0;
1487        __u64 length = 1 + flock->fl_end - flock->fl_start;
1488        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1489        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1490        struct TCP_Server_Info *server = tcon->ses->server;
1491        struct inode *inode = cfile->dentry->d_inode;
1492
1493        if (posix_lck) {
1494                int posix_lock_type;
1495
1496                rc = cifs_posix_lock_set(file, flock);
1497                if (!rc || rc < 0)
1498                        return rc;
1499
1500                if (type & server->vals->shared_lock_type)
1501                        posix_lock_type = CIFS_RDLCK;
1502                else
1503                        posix_lock_type = CIFS_WRLCK;
1504
1505                if (unlock == 1)
1506                        posix_lock_type = CIFS_UNLCK;
1507
1508                rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1509                                      current->tgid, flock->fl_start, length,
1510                                      NULL, posix_lock_type, wait_flag);
1511                goto out;
1512        }
1513
1514        if (lock) {
1515                struct cifsLockInfo *lock;
1516
1517                lock = cifs_lock_init(flock->fl_start, length, type);
1518                if (!lock)
1519                        return -ENOMEM;
1520
1521                rc = cifs_lock_add_if(cfile, lock, wait_flag);
1522                if (rc < 0) {
1523                        kfree(lock);
1524                        return rc;
1525                }
1526                if (!rc)
1527                        goto out;
1528
1529                /*
1530                 * Windows 7 server can delay breaking lease from read to None
1531                 * if we set a byte-range lock on a file - break it explicitly
1532                 * before sending the lock to the server to be sure the next
1533                 * read won't conflict with non-overlapted locks due to
1534                 * pagereading.
1535                 */
1536                if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1537                                        CIFS_CACHE_READ(CIFS_I(inode))) {
1538                        cifs_zap_mapping(inode);
1539                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1540                                 inode);
1541                        CIFS_I(inode)->oplock = 0;
1542                }
1543
1544                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1545                                            type, 1, 0, wait_flag);
1546                if (rc) {
1547                        kfree(lock);
1548                        return rc;
1549                }
1550
1551                cifs_lock_add(cfile, lock);
1552        } else if (unlock)
1553                rc = server->ops->mand_unlock_range(cfile, flock, xid);
1554
1555out:
1556        if (flock->fl_flags & FL_POSIX)
1557                posix_lock_file_wait(file, flock);
1558        return rc;
1559}
1560
1561int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1562{
1563        int rc, xid;
1564        int lock = 0, unlock = 0;
1565        bool wait_flag = false;
1566        bool posix_lck = false;
1567        struct cifs_sb_info *cifs_sb;
1568        struct cifs_tcon *tcon;
1569        struct cifsInodeInfo *cinode;
1570        struct cifsFileInfo *cfile;
1571        __u16 netfid;
1572        __u32 type;
1573
1574        rc = -EACCES;
1575        xid = get_xid();
1576
1577        cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1578                 cmd, flock->fl_flags, flock->fl_type,
1579                 flock->fl_start, flock->fl_end);
1580
1581        cfile = (struct cifsFileInfo *)file->private_data;
1582        tcon = tlink_tcon(cfile->tlink);
1583
1584        cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1585                        tcon->ses->server);
1586
1587        cifs_sb = CIFS_FILE_SB(file);
1588        netfid = cfile->fid.netfid;
1589        cinode = CIFS_I(file_inode(file));
1590
1591        if (cap_unix(tcon->ses) &&
1592            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1593            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1594                posix_lck = true;
1595        /*
1596         * BB add code here to normalize offset and length to account for
1597         * negative length which we can not accept over the wire.
1598         */
1599        if (IS_GETLK(cmd)) {
1600                rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1601                free_xid(xid);
1602                return rc;
1603        }
1604
1605        if (!lock && !unlock) {
1606                /*
1607                 * if no lock or unlock then nothing to do since we do not
1608                 * know what it is
1609                 */
1610                free_xid(xid);
1611                return -EOPNOTSUPP;
1612        }
1613
1614        rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1615                        xid);
1616        free_xid(xid);
1617        return rc;
1618}
1619
1620/*
1621 * update the file size (if needed) after a write. Should be called with
1622 * the inode->i_lock held
1623 */
1624void
1625cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1626                      unsigned int bytes_written)
1627{
1628        loff_t end_of_write = offset + bytes_written;
1629
1630        if (end_of_write > cifsi->server_eof)
1631                cifsi->server_eof = end_of_write;
1632}
1633
1634static ssize_t
1635cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1636           size_t write_size, loff_t *offset)
1637{
1638        int rc = 0;
1639        unsigned int bytes_written = 0;
1640        unsigned int total_written;
1641        struct cifs_sb_info *cifs_sb;
1642        struct cifs_tcon *tcon;
1643        struct TCP_Server_Info *server;
1644        unsigned int xid;
1645        struct dentry *dentry = open_file->dentry;
1646        struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1647        struct cifs_io_parms io_parms;
1648
1649        cifs_sb = CIFS_SB(dentry->d_sb);
1650
1651        cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1652                 write_size, *offset, dentry);
1653
1654        tcon = tlink_tcon(open_file->tlink);
1655        server = tcon->ses->server;
1656
1657        if (!server->ops->sync_write)
1658                return -ENOSYS;
1659
1660        xid = get_xid();
1661
1662        for (total_written = 0; write_size > total_written;
1663             total_written += bytes_written) {
1664                rc = -EAGAIN;
1665                while (rc == -EAGAIN) {
1666                        struct kvec iov[2];
1667                        unsigned int len;
1668
1669                        if (open_file->invalidHandle) {
1670                                /* we could deadlock if we called
1671                                   filemap_fdatawait from here so tell
1672                                   reopen_file not to flush data to
1673                                   server now */
1674                                rc = cifs_reopen_file(open_file, false);
1675                                if (rc != 0)
1676                                        break;
1677                        }
1678
1679                        len = min(server->ops->wp_retry_size(dentry->d_inode),
1680                                  (unsigned int)write_size - total_written);
1681                        /* iov[0] is reserved for smb header */
1682                        iov[1].iov_base = (char *)write_data + total_written;
1683                        iov[1].iov_len = len;
1684                        io_parms.pid = pid;
1685                        io_parms.tcon = tcon;
1686                        io_parms.offset = *offset;
1687                        io_parms.length = len;
1688                        rc = server->ops->sync_write(xid, &open_file->fid,
1689                                        &io_parms, &bytes_written, iov, 1);
1690                }
1691                if (rc || (bytes_written == 0)) {
1692                        if (total_written)
1693                                break;
1694                        else {
1695                                free_xid(xid);
1696                                return rc;
1697                        }
1698                } else {
1699                        spin_lock(&dentry->d_inode->i_lock);
1700                        cifs_update_eof(cifsi, *offset, bytes_written);
1701                        spin_unlock(&dentry->d_inode->i_lock);
1702                        *offset += bytes_written;
1703                }
1704        }
1705
1706        cifs_stats_bytes_written(tcon, total_written);
1707
1708        if (total_written > 0) {
1709                spin_lock(&dentry->d_inode->i_lock);
1710                if (*offset > dentry->d_inode->i_size)
1711                        i_size_write(dentry->d_inode, *offset);
1712                spin_unlock(&dentry->d_inode->i_lock);
1713        }
1714        mark_inode_dirty_sync(dentry->d_inode);
1715        free_xid(xid);
1716        return total_written;
1717}
1718
1719struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1720                                        bool fsuid_only)
1721{
1722        struct cifsFileInfo *open_file = NULL;
1723        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1724
1725        /* only filter by fsuid on multiuser mounts */
1726        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1727                fsuid_only = false;
1728
1729        spin_lock(&cifs_file_list_lock);
1730        /* we could simply get the first_list_entry since write-only entries
1731           are always at the end of the list but since the first entry might
1732           have a close pending, we go through the whole list */
1733        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1734                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1735                        continue;
1736                if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1737                        if (!open_file->invalidHandle) {
1738                                /* found a good file */
1739                                /* lock it so it will not be closed on us */
1740                                cifsFileInfo_get_locked(open_file);
1741                                spin_unlock(&cifs_file_list_lock);
1742                                return open_file;
1743                        } /* else might as well continue, and look for
1744                             another, or simply have the caller reopen it
1745                             again rather than trying to fix this handle */
1746                } else /* write only file */
1747                        break; /* write only files are last so must be done */
1748        }
1749        spin_unlock(&cifs_file_list_lock);
1750        return NULL;
1751}
1752
1753struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1754                                        bool fsuid_only)
1755{
1756        struct cifsFileInfo *open_file, *inv_file = NULL;
1757        struct cifs_sb_info *cifs_sb;
1758        bool any_available = false;
1759        int rc;
1760        unsigned int refind = 0;
1761
1762        /* Having a null inode here (because mapping->host was set to zero by
1763        the VFS or MM) should not happen but we had reports of on oops (due to
1764        it being zero) during stress testcases so we need to check for it */
1765
1766        if (cifs_inode == NULL) {
1767                cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1768                dump_stack();
1769                return NULL;
1770        }
1771
1772        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1773
1774        /* only filter by fsuid on multiuser mounts */
1775        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1776                fsuid_only = false;
1777
1778        spin_lock(&cifs_file_list_lock);
1779refind_writable:
1780        if (refind > MAX_REOPEN_ATT) {
1781                spin_unlock(&cifs_file_list_lock);
1782                return NULL;
1783        }
1784        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1785                if (!any_available && open_file->pid != current->tgid)
1786                        continue;
1787                if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1788                        continue;
1789                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1790                        if (!open_file->invalidHandle) {
1791                                /* found a good writable file */
1792                                cifsFileInfo_get_locked(open_file);
1793                                spin_unlock(&cifs_file_list_lock);
1794                                return open_file;
1795                        } else {
1796                                if (!inv_file)
1797                                        inv_file = open_file;
1798                        }
1799                }
1800        }
1801        /* couldn't find useable FH with same pid, try any available */
1802        if (!any_available) {
1803                any_available = true;
1804                goto refind_writable;
1805        }
1806
1807        if (inv_file) {
1808                any_available = false;
1809                cifsFileInfo_get_locked(inv_file);
1810        }
1811
1812        spin_unlock(&cifs_file_list_lock);
1813
1814        if (inv_file) {
1815                rc = cifs_reopen_file(inv_file, false);
1816                if (!rc)
1817                        return inv_file;
1818                else {
1819                        spin_lock(&cifs_file_list_lock);
1820                        list_move_tail(&inv_file->flist,
1821                                        &cifs_inode->openFileList);
1822                        spin_unlock(&cifs_file_list_lock);
1823                        cifsFileInfo_put(inv_file);
1824                        spin_lock(&cifs_file_list_lock);
1825                        ++refind;
1826                        inv_file = NULL;
1827                        goto refind_writable;
1828                }
1829        }
1830
1831        return NULL;
1832}
1833
1834static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1835{
1836        struct address_space *mapping = page->mapping;
1837        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1838        char *write_data;
1839        int rc = -EFAULT;
1840        int bytes_written = 0;
1841        struct inode *inode;
1842        struct cifsFileInfo *open_file;
1843
1844        if (!mapping || !mapping->host)
1845                return -EFAULT;
1846
1847        inode = page->mapping->host;
1848
1849        offset += (loff_t)from;
1850        write_data = kmap(page);
1851        write_data += from;
1852
1853        if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1854                kunmap(page);
1855                return -EIO;
1856        }
1857
1858        /* racing with truncate? */
1859        if (offset > mapping->host->i_size) {
1860                kunmap(page);
1861                return 0; /* don't care */
1862        }
1863
1864        /* check to make sure that we are not extending the file */
1865        if (mapping->host->i_size - offset < (loff_t)to)
1866                to = (unsigned)(mapping->host->i_size - offset);
1867
1868        open_file = find_writable_file(CIFS_I(mapping->host), false);
1869        if (open_file) {
1870                bytes_written = cifs_write(open_file, open_file->pid,
1871                                           write_data, to - from, &offset);
1872                cifsFileInfo_put(open_file);
1873                /* Does mm or vfs already set times? */
1874                inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1875                if ((bytes_written > 0) && (offset))
1876                        rc = 0;
1877                else if (bytes_written < 0)
1878                        rc = bytes_written;
1879        } else {
1880                cifs_dbg(FYI, "No writeable filehandles for inode\n");
1881                rc = -EIO;
1882        }
1883
1884        kunmap(page);
1885        return rc;
1886}
1887
1888static struct cifs_writedata *
1889wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1890                          pgoff_t end, pgoff_t *index,
1891                          unsigned int *found_pages)
1892{
1893        unsigned int nr_pages;
1894        struct page **pages;
1895        struct cifs_writedata *wdata;
1896
1897        wdata = cifs_writedata_alloc((unsigned int)tofind,
1898                                     cifs_writev_complete);
1899        if (!wdata)
1900                return NULL;
1901
1902        /*
1903         * find_get_pages_tag seems to return a max of 256 on each
1904         * iteration, so we must call it several times in order to
1905         * fill the array or the wsize is effectively limited to
1906         * 256 * PAGE_CACHE_SIZE.
1907         */
1908        *found_pages = 0;
1909        pages = wdata->pages;
1910        do {
1911                nr_pages = find_get_pages_tag(mapping, index,
1912                                              PAGECACHE_TAG_DIRTY, tofind,
1913                                              pages);
1914                *found_pages += nr_pages;
1915                tofind -= nr_pages;
1916                pages += nr_pages;
1917        } while (nr_pages && tofind && *index <= end);
1918
1919        return wdata;
1920}
1921
1922static unsigned int
1923wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1924                    struct address_space *mapping,
1925                    struct writeback_control *wbc,
1926                    pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1927{
1928        unsigned int nr_pages = 0, i;
1929        struct page *page;
1930
1931        for (i = 0; i < found_pages; i++) {
1932                page = wdata->pages[i];
1933                /*
1934                 * At this point we hold neither mapping->tree_lock nor
1935                 * lock on the page itself: the page may be truncated or
1936                 * invalidated (changing page->mapping to NULL), or even
1937                 * swizzled back from swapper_space to tmpfs file
1938                 * mapping
1939                 */
1940
1941                if (nr_pages == 0)
1942                        lock_page(page);
1943                else if (!trylock_page(page))
1944                        break;
1945
1946                if (unlikely(page->mapping != mapping)) {
1947                        unlock_page(page);
1948                        break;
1949                }
1950
1951                if (!wbc->range_cyclic && page->index > end) {
1952                        *done = true;
1953                        unlock_page(page);
1954                        break;
1955                }
1956
1957                if (*next && (page->index != *next)) {
1958                        /* Not next consecutive page */
1959                        unlock_page(page);
1960                        break;
1961                }
1962
1963                if (wbc->sync_mode != WB_SYNC_NONE)
1964                        wait_on_page_writeback(page);
1965
1966                if (PageWriteback(page) ||
1967                                !clear_page_dirty_for_io(page)) {
1968                        unlock_page(page);
1969                        break;
1970                }
1971
1972                /*
1973                 * This actually clears the dirty bit in the radix tree.
1974                 * See cifs_writepage() for more commentary.
1975                 */
1976                set_page_writeback(page);
1977                if (page_offset(page) >= i_size_read(mapping->host)) {
1978                        *done = true;
1979                        unlock_page(page);
1980                        end_page_writeback(page);
1981                        break;
1982                }
1983
1984                wdata->pages[i] = page;
1985                *next = page->index + 1;
1986                ++nr_pages;
1987        }
1988
1989        /* reset index to refind any pages skipped */
1990        if (nr_pages == 0)
1991                *index = wdata->pages[0]->index + 1;
1992
1993        /* put any pages we aren't going to use */
1994        for (i = nr_pages; i < found_pages; i++) {
1995                page_cache_release(wdata->pages[i]);
1996                wdata->pages[i] = NULL;
1997        }
1998
1999        return nr_pages;
2000}
2001
2002static int
2003wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2004                 struct address_space *mapping, struct writeback_control *wbc)
2005{
2006        int rc = 0;
2007        struct TCP_Server_Info *server;
2008        unsigned int i;
2009
2010        wdata->sync_mode = wbc->sync_mode;
2011        wdata->nr_pages = nr_pages;
2012        wdata->offset = page_offset(wdata->pages[0]);
2013        wdata->pagesz = PAGE_CACHE_SIZE;
2014        wdata->tailsz = min(i_size_read(mapping->host) -
2015                        page_offset(wdata->pages[nr_pages - 1]),
2016                        (loff_t)PAGE_CACHE_SIZE);
2017        wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
2018
2019        if (wdata->cfile != NULL)
2020                cifsFileInfo_put(wdata->cfile);
2021        wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2022        if (!wdata->cfile) {
2023                cifs_dbg(VFS, "No writable handles for inode\n");
2024                rc = -EBADF;
2025        } else {
2026                wdata->pid = wdata->cfile->pid;
2027                server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2028                rc = server->ops->async_writev(wdata, cifs_writedata_release);
2029        }
2030
2031        for (i = 0; i < nr_pages; ++i)
2032                unlock_page(wdata->pages[i]);
2033
2034        return rc;
2035}
2036
2037static int cifs_writepages(struct address_space *mapping,
2038                           struct writeback_control *wbc)
2039{
2040        struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2041        struct TCP_Server_Info *server;
2042        bool done = false, scanned = false, range_whole = false;
2043        pgoff_t end, index;
2044        struct cifs_writedata *wdata;
2045        int rc = 0;
2046
2047        /*
2048         * If wsize is smaller than the page cache size, default to writing
2049         * one page at a time via cifs_writepage
2050         */
2051        if (cifs_sb->wsize < PAGE_CACHE_SIZE)
2052                return generic_writepages(mapping, wbc);
2053
2054        if (wbc->range_cyclic) {
2055                index = mapping->writeback_index; /* Start from prev offset */
2056                end = -1;
2057        } else {
2058                index = wbc->range_start >> PAGE_CACHE_SHIFT;
2059                end = wbc->range_end >> PAGE_CACHE_SHIFT;
2060                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2061                        range_whole = true;
2062                scanned = true;
2063        }
2064        server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2065retry:
2066        while (!done && index <= end) {
2067                unsigned int i, nr_pages, found_pages, wsize, credits;
2068                pgoff_t next = 0, tofind, saved_index = index;
2069
2070                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2071                                                   &wsize, &credits);
2072                if (rc)
2073                        break;
2074
2075                tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
2076
2077                wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2078                                                  &found_pages);
2079                if (!wdata) {
2080                        rc = -ENOMEM;
2081                        add_credits_and_wake_if(server, credits, 0);
2082                        break;
2083                }
2084
2085                if (found_pages == 0) {
2086                        kref_put(&wdata->refcount, cifs_writedata_release);
2087                        add_credits_and_wake_if(server, credits, 0);
2088                        break;
2089                }
2090
2091                nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2092                                               end, &index, &next, &done);
2093
2094                /* nothing to write? */
2095                if (nr_pages == 0) {
2096                        kref_put(&wdata->refcount, cifs_writedata_release);
2097                        add_credits_and_wake_if(server, credits, 0);
2098                        continue;
2099                }
2100
2101                wdata->credits = credits;
2102
2103                rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2104
2105                /* send failure -- clean up the mess */
2106                if (rc != 0) {
2107                        add_credits_and_wake_if(server, wdata->credits, 0);
2108                        for (i = 0; i < nr_pages; ++i) {
2109                                if (rc == -EAGAIN)
2110                                        redirty_page_for_writepage(wbc,
2111                                                           wdata->pages[i]);
2112                                else
2113                                        SetPageError(wdata->pages[i]);
2114                                end_page_writeback(wdata->pages[i]);
2115                                page_cache_release(wdata->pages[i]);
2116                        }
2117                        if (rc != -EAGAIN)
2118                                mapping_set_error(mapping, rc);
2119                }
2120                kref_put(&wdata->refcount, cifs_writedata_release);
2121
2122                if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2123                        index = saved_index;
2124                        continue;
2125                }
2126
2127                wbc->nr_to_write -= nr_pages;
2128                if (wbc->nr_to_write <= 0)
2129                        done = true;
2130
2131                index = next;
2132        }
2133
2134        if (!scanned && !done) {
2135                /*
2136                 * We hit the last page and there is more work to be done: wrap
2137                 * back to the start of the file
2138                 */
2139                scanned = true;
2140                index = 0;
2141                goto retry;
2142        }
2143
2144        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2145                mapping->writeback_index = index;
2146
2147        return rc;
2148}
2149
2150static int
2151cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2152{
2153        int rc;
2154        unsigned int xid;
2155
2156        xid = get_xid();
2157/* BB add check for wbc flags */
2158        page_cache_get(page);
2159        if (!PageUptodate(page))
2160                cifs_dbg(FYI, "ppw - page not up to date\n");
2161
2162        /*
2163         * Set the "writeback" flag, and clear "dirty" in the radix tree.
2164         *
2165         * A writepage() implementation always needs to do either this,
2166         * or re-dirty the page with "redirty_page_for_writepage()" in
2167         * the case of a failure.
2168         *
2169         * Just unlocking the page will cause the radix tree tag-bits
2170         * to fail to update with the state of the page correctly.
2171         */
2172        set_page_writeback(page);
2173retry_write:
2174        rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2175        if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2176                goto retry_write;
2177        else if (rc == -EAGAIN)
2178                redirty_page_for_writepage(wbc, page);
2179        else if (rc != 0)
2180                SetPageError(page);
2181        else
2182                SetPageUptodate(page);
2183        end_page_writeback(page);
2184        page_cache_release(page);
2185        free_xid(xid);
2186        return rc;
2187}
2188
2189static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2190{
2191        int rc = cifs_writepage_locked(page, wbc);
2192        unlock_page(page);
2193        return rc;
2194}
2195
2196static int cifs_write_end(struct file *file, struct address_space *mapping,
2197                        loff_t pos, unsigned len, unsigned copied,
2198                        struct page *page, void *fsdata)
2199{
2200        int rc;
2201        struct inode *inode = mapping->host;
2202        struct cifsFileInfo *cfile = file->private_data;
2203        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2204        __u32 pid;
2205
2206        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2207                pid = cfile->pid;
2208        else
2209                pid = current->tgid;
2210
2211        cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2212                 page, pos, copied);
2213
2214        if (PageChecked(page)) {
2215                if (copied == len)
2216                        SetPageUptodate(page);
2217                ClearPageChecked(page);
2218        } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2219                SetPageUptodate(page);
2220
2221        if (!PageUptodate(page)) {
2222                char *page_data;
2223                unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2224                unsigned int xid;
2225
2226                xid = get_xid();
2227                /* this is probably better than directly calling
2228                   partialpage_write since in this function the file handle is
2229                   known which we might as well leverage */
2230                /* BB check if anything else missing out of ppw
2231                   such as updating last write time */
2232                page_data = kmap(page);
2233                rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2234                /* if (rc < 0) should we set writebehind rc? */
2235                kunmap(page);
2236
2237                free_xid(xid);
2238        } else {
2239                rc = copied;
2240                pos += copied;
2241                set_page_dirty(page);
2242        }
2243
2244        if (rc > 0) {
2245                spin_lock(&inode->i_lock);
2246                if (pos > inode->i_size)
2247                        i_size_write(inode, pos);
2248                spin_unlock(&inode->i_lock);
2249        }
2250
2251        unlock_page(page);
2252        page_cache_release(page);
2253
2254        return rc;
2255}
2256
2257int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2258                      int datasync)
2259{
2260        unsigned int xid;
2261        int rc = 0;
2262        struct cifs_tcon *tcon;
2263        struct TCP_Server_Info *server;
2264        struct cifsFileInfo *smbfile = file->private_data;
2265        struct inode *inode = file_inode(file);
2266        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2267
2268        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2269        if (rc)
2270                return rc;
2271        mutex_lock(&inode->i_mutex);
2272
2273        xid = get_xid();
2274
2275        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2276                 file, datasync);
2277
2278        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2279                rc = cifs_zap_mapping(inode);
2280                if (rc) {
2281                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2282                        rc = 0; /* don't care about it in fsync */
2283                }
2284        }
2285
2286        tcon = tlink_tcon(smbfile->tlink);
2287        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2288                server = tcon->ses->server;
2289                if (server->ops->flush)
2290                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2291                else
2292                        rc = -ENOSYS;
2293        }
2294
2295        free_xid(xid);
2296        mutex_unlock(&inode->i_mutex);
2297        return rc;
2298}
2299
2300int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2301{
2302        unsigned int xid;
2303        int rc = 0;
2304        struct cifs_tcon *tcon;
2305        struct TCP_Server_Info *server;
2306        struct cifsFileInfo *smbfile = file->private_data;
2307        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2308        struct inode *inode = file->f_mapping->host;
2309
2310        rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2311        if (rc)
2312                return rc;
2313        mutex_lock(&inode->i_mutex);
2314
2315        xid = get_xid();
2316
2317        cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2318                 file, datasync);
2319
2320        tcon = tlink_tcon(smbfile->tlink);
2321        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2322                server = tcon->ses->server;
2323                if (server->ops->flush)
2324                        rc = server->ops->flush(xid, tcon, &smbfile->fid);
2325                else
2326                        rc = -ENOSYS;
2327        }
2328
2329        free_xid(xid);
2330        mutex_unlock(&inode->i_mutex);
2331        return rc;
2332}
2333
2334/*
2335 * As file closes, flush all cached write data for this inode checking
2336 * for write behind errors.
2337 */
2338int cifs_flush(struct file *file, fl_owner_t id)
2339{
2340        struct inode *inode = file_inode(file);
2341        int rc = 0;
2342
2343        if (file->f_mode & FMODE_WRITE)
2344                rc = filemap_write_and_wait(inode->i_mapping);
2345
2346        cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2347
2348        return rc;
2349}
2350
2351static int
2352cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2353{
2354        int rc = 0;
2355        unsigned long i;
2356
2357        for (i = 0; i < num_pages; i++) {
2358                pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2359                if (!pages[i]) {
2360                        /*
2361                         * save number of pages we have already allocated and
2362                         * return with ENOMEM error
2363                         */
2364                        num_pages = i;
2365                        rc = -ENOMEM;
2366                        break;
2367                }
2368        }
2369
2370        if (rc) {
2371                for (i = 0; i < num_pages; i++)
2372                        put_page(pages[i]);
2373        }
2374        return rc;
2375}
2376
2377static inline
2378size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2379{
2380        size_t num_pages;
2381        size_t clen;
2382
2383        clen = min_t(const size_t, len, wsize);
2384        num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2385
2386        if (cur_len)
2387                *cur_len = clen;
2388
2389        return num_pages;
2390}
2391
2392static void
2393cifs_uncached_writedata_release(struct kref *refcount)
2394{
2395        int i;
2396        struct cifs_writedata *wdata = container_of(refcount,
2397                                        struct cifs_writedata, refcount);
2398
2399        for (i = 0; i < wdata->nr_pages; i++)
2400                put_page(wdata->pages[i]);
2401        cifs_writedata_release(refcount);
2402}
2403
2404static void
2405cifs_uncached_writev_complete(struct work_struct *work)
2406{
2407        struct cifs_writedata *wdata = container_of(work,
2408                                        struct cifs_writedata, work);
2409        struct inode *inode = wdata->cfile->dentry->d_inode;
2410        struct cifsInodeInfo *cifsi = CIFS_I(inode);
2411
2412        spin_lock(&inode->i_lock);
2413        cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2414        if (cifsi->server_eof > inode->i_size)
2415                i_size_write(inode, cifsi->server_eof);
2416        spin_unlock(&inode->i_lock);
2417
2418        complete(&wdata->done);
2419
2420        kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2421}
2422
2423static int
2424wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2425                      size_t *len, unsigned long *num_pages)
2426{
2427        size_t save_len, copied, bytes, cur_len = *len;
2428        unsigned long i, nr_pages = *num_pages;
2429
2430        save_len = cur_len;
2431        for (i = 0; i < nr_pages; i++) {
2432                bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2433                copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2434                cur_len -= copied;
2435                /*
2436                 * If we didn't copy as much as we expected, then that
2437                 * may mean we trod into an unmapped area. Stop copying
2438                 * at that point. On the next pass through the big
2439                 * loop, we'll likely end up getting a zero-length
2440                 * write and bailing out of it.
2441                 */
2442                if (copied < bytes)
2443                        break;
2444        }
2445        cur_len = save_len - cur_len;
2446        *len = cur_len;
2447
2448        /*
2449         * If we have no data to send, then that probably means that
2450         * the copy above failed altogether. That's most likely because
2451         * the address in the iovec was bogus. Return -EFAULT and let
2452         * the caller free anything we allocated and bail out.
2453         */
2454        if (!cur_len)
2455                return -EFAULT;
2456
2457        /*
2458         * i + 1 now represents the number of pages we actually used in
2459         * the copy phase above.
2460         */
2461        *num_pages = i + 1;
2462        return 0;
2463}
2464
2465static int
2466cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2467                     struct cifsFileInfo *open_file,
2468                     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list)
2469{
2470        int rc = 0;
2471        size_t cur_len;
2472        unsigned long nr_pages, num_pages, i;
2473        struct cifs_writedata *wdata;
2474        struct iov_iter saved_from;
2475        loff_t saved_offset = offset;
2476        pid_t pid;
2477        struct TCP_Server_Info *server;
2478
2479        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2480                pid = open_file->pid;
2481        else
2482                pid = current->tgid;
2483
2484        server = tlink_tcon(open_file->tlink)->ses->server;
2485        memcpy(&saved_from, from, sizeof(struct iov_iter));
2486
2487        do {
2488                unsigned int wsize, credits;
2489
2490                rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2491                                                   &wsize, &credits);
2492                if (rc)
2493                        break;
2494
2495                nr_pages = get_numpages(wsize, len, &cur_len);
2496                wdata = cifs_writedata_alloc(nr_pages,
2497                                             cifs_uncached_writev_complete);
2498                if (!wdata) {
2499                        rc = -ENOMEM;
2500                        add_credits_and_wake_if(server, credits, 0);
2501                        break;
2502                }
2503
2504                rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2505                if (rc) {
2506                        kfree(wdata);
2507                        add_credits_and_wake_if(server, credits, 0);
2508                        break;
2509                }
2510
2511                num_pages = nr_pages;
2512                rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2513                if (rc) {
2514                        for (i = 0; i < nr_pages; i++)
2515                                put_page(wdata->pages[i]);
2516                        kfree(wdata);
2517                        add_credits_and_wake_if(server, credits, 0);
2518                        break;
2519                }
2520
2521                /*
2522                 * Bring nr_pages down to the number of pages we actually used,
2523                 * and free any pages that we didn't use.
2524                 */
2525                for ( ; nr_pages > num_pages; nr_pages--)
2526                        put_page(wdata->pages[nr_pages - 1]);
2527
2528                wdata->sync_mode = WB_SYNC_ALL;
2529                wdata->nr_pages = nr_pages;
2530                wdata->offset = (__u64)offset;
2531                wdata->cfile = cifsFileInfo_get(open_file);
2532                wdata->pid = pid;
2533                wdata->bytes = cur_len;
2534                wdata->pagesz = PAGE_SIZE;
2535                wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2536                wdata->credits = credits;
2537
2538                if (!wdata->cfile->invalidHandle ||
2539                    !cifs_reopen_file(wdata->cfile, false))
2540                        rc = server->ops->async_writev(wdata,
2541                                        cifs_uncached_writedata_release);
2542                if (rc) {
2543                        add_credits_and_wake_if(server, wdata->credits, 0);
2544                        kref_put(&wdata->refcount,
2545                                 cifs_uncached_writedata_release);
2546                        if (rc == -EAGAIN) {
2547                                memcpy(from, &saved_from,
2548                                       sizeof(struct iov_iter));
2549                                iov_iter_advance(from, offset - saved_offset);
2550                                continue;
2551                        }
2552                        break;
2553                }
2554
2555                list_add_tail(&wdata->list, wdata_list);
2556                offset += cur_len;
2557                len -= cur_len;
2558        } while (len > 0);
2559
2560        return rc;
2561}
2562
2563static ssize_t
2564cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
2565{
2566        size_t len;
2567        ssize_t total_written = 0;
2568        struct cifsFileInfo *open_file;
2569        struct cifs_tcon *tcon;
2570        struct cifs_sb_info *cifs_sb;
2571        struct cifs_writedata *wdata, *tmp;
2572        struct list_head wdata_list;
2573        struct iov_iter saved_from;
2574        int rc;
2575
2576        len = iov_iter_count(from);
2577        rc = generic_write_checks(file, poffset, &len, 0);
2578        if (rc)
2579                return rc;
2580
2581        if (!len)
2582                return 0;
2583
2584        iov_iter_truncate(from, len);
2585
2586        INIT_LIST_HEAD(&wdata_list);
2587        cifs_sb = CIFS_FILE_SB(file);
2588        open_file = file->private_data;
2589        tcon = tlink_tcon(open_file->tlink);
2590
2591        if (!tcon->ses->server->ops->async_writev)
2592                return -ENOSYS;
2593
2594        memcpy(&saved_from, from, sizeof(struct iov_iter));
2595
2596        rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
2597                                  &wdata_list);
2598
2599        /*
2600         * If at least one write was successfully sent, then discard any rc
2601         * value from the later writes. If the other write succeeds, then
2602         * we'll end up returning whatever was written. If it fails, then
2603         * we'll get a new rc value from that.
2604         */
2605        if (!list_empty(&wdata_list))
2606                rc = 0;
2607
2608        /*
2609         * Wait for and collect replies for any successful sends in order of
2610         * increasing offset. Once an error is hit or we get a fatal signal
2611         * while waiting, then return without waiting for any more replies.
2612         */
2613restart_loop:
2614        list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2615                if (!rc) {
2616                        /* FIXME: freezable too? */
2617                        rc = wait_for_completion_killable(&wdata->done);
2618                        if (rc)
2619                                rc = -EINTR;
2620                        else if (wdata->result)
2621                                rc = wdata->result;
2622                        else
2623                                total_written += wdata->bytes;
2624
2625                        /* resend call if it's a retryable error */
2626                        if (rc == -EAGAIN) {
2627                                struct list_head tmp_list;
2628                                struct iov_iter tmp_from;
2629
2630                                INIT_LIST_HEAD(&tmp_list);
2631                                list_del_init(&wdata->list);
2632
2633                                memcpy(&tmp_from, &saved_from,
2634                                       sizeof(struct iov_iter));
2635                                iov_iter_advance(&tmp_from,
2636                                                 wdata->offset - *poffset);
2637
2638                                rc = cifs_write_from_iter(wdata->offset,
2639                                                wdata->bytes, &tmp_from,
2640                                                open_file, cifs_sb, &tmp_list);
2641
2642                                list_splice(&tmp_list, &wdata_list);
2643
2644                                kref_put(&wdata->refcount,
2645                                         cifs_uncached_writedata_release);
2646                                goto restart_loop;
2647                        }
2648                }
2649                list_del_init(&wdata->list);
2650                kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2651        }
2652
2653        if (total_written > 0)
2654                *poffset += total_written;
2655
2656        cifs_stats_bytes_written(tcon, total_written);
2657        return total_written ? total_written : (ssize_t)rc;
2658}
2659
2660ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2661{
2662        ssize_t written;
2663        struct inode *inode;
2664        loff_t pos = iocb->ki_pos;
2665
2666        inode = file_inode(iocb->ki_filp);
2667
2668        /*
2669         * BB - optimize the way when signing is disabled. We can drop this
2670         * extra memory-to-memory copying and use iovec buffers for constructing
2671         * write request.
2672         */
2673
2674        written = cifs_iovec_write(iocb->ki_filp, from, &pos);
2675        if (written > 0) {
2676                set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
2677                iocb->ki_pos = pos;
2678        }
2679
2680        return written;
2681}
2682
2683static ssize_t
2684cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2685{
2686        struct file *file = iocb->ki_filp;
2687        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2688        struct inode *inode = file->f_mapping->host;
2689        struct cifsInodeInfo *cinode = CIFS_I(inode);
2690        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2691        ssize_t rc = -EACCES;
2692        loff_t lock_pos = iocb->ki_pos;
2693
2694        /*
2695         * We need to hold the sem to be sure nobody modifies lock list
2696         * with a brlock that prevents writing.
2697         */
2698        down_read(&cinode->lock_sem);
2699        mutex_lock(&inode->i_mutex);
2700        if (file->f_flags & O_APPEND)
2701                lock_pos = i_size_read(inode);
2702        if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
2703                                     server->vals->exclusive_lock_type, NULL,
2704                                     CIFS_WRITE_OP)) {
2705                rc = __generic_file_write_iter(iocb, from);
2706                mutex_unlock(&inode->i_mutex);
2707
2708                if (rc > 0) {
2709                        ssize_t err;
2710
2711                        err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2712                        if (err < 0)
2713                                rc = err;
2714                }
2715        } else {
2716                mutex_unlock(&inode->i_mutex);
2717        }
2718        up_read(&cinode->lock_sem);
2719        return rc;
2720}
2721
2722ssize_t
2723cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2724{
2725        struct inode *inode = file_inode(iocb->ki_filp);
2726        struct cifsInodeInfo *cinode = CIFS_I(inode);
2727        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2728        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2729                                                iocb->ki_filp->private_data;
2730        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2731        ssize_t written;
2732
2733        written = cifs_get_writer(cinode);
2734        if (written)
2735                return written;
2736
2737        if (CIFS_CACHE_WRITE(cinode)) {
2738                if (cap_unix(tcon->ses) &&
2739                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2740                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2741                        written = generic_file_write_iter(iocb, from);
2742                        goto out;
2743                }
2744                written = cifs_writev(iocb, from);
2745                goto out;
2746        }
2747        /*
2748         * For non-oplocked files in strict cache mode we need to write the data
2749         * to the server exactly from the pos to pos+len-1 rather than flush all
2750         * affected pages because it may cause a error with mandatory locks on
2751         * these pages but not on the region from pos to ppos+len-1.
2752         */
2753        written = cifs_user_writev(iocb, from);
2754        if (written > 0 && CIFS_CACHE_READ(cinode)) {
2755                /*
2756                 * Windows 7 server can delay breaking level2 oplock if a write
2757                 * request comes - break it on the client to prevent reading
2758                 * an old data.
2759                 */
2760                cifs_zap_mapping(inode);
2761                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2762                         inode);
2763                cinode->oplock = 0;
2764        }
2765out:
2766        cifs_put_writer(cinode);
2767        return written;
2768}
2769
2770static struct cifs_readdata *
2771cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2772{
2773        struct cifs_readdata *rdata;
2774
2775        rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2776                        GFP_KERNEL);
2777        if (rdata != NULL) {
2778                kref_init(&rdata->refcount);
2779                INIT_LIST_HEAD(&rdata->list);
2780                init_completion(&rdata->done);
2781                INIT_WORK(&rdata->work, complete);
2782        }
2783
2784        return rdata;
2785}
2786
2787void
2788cifs_readdata_release(struct kref *refcount)
2789{
2790        struct cifs_readdata *rdata = container_of(refcount,
2791                                        struct cifs_readdata, refcount);
2792
2793        if (rdata->cfile)
2794                cifsFileInfo_put(rdata->cfile);
2795
2796        kfree(rdata);
2797}
2798
2799static int
2800cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2801{
2802        int rc = 0;
2803        struct page *page;
2804        unsigned int i;
2805
2806        for (i = 0; i < nr_pages; i++) {
2807                page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2808                if (!page) {
2809                        rc = -ENOMEM;
2810                        break;
2811                }
2812                rdata->pages[i] = page;
2813        }
2814
2815        if (rc) {
2816                for (i = 0; i < nr_pages; i++) {
2817                        put_page(rdata->pages[i]);
2818                        rdata->pages[i] = NULL;
2819                }
2820        }
2821        return rc;
2822}
2823
2824static void
2825cifs_uncached_readdata_release(struct kref *refcount)
2826{
2827        struct cifs_readdata *rdata = container_of(refcount,
2828                                        struct cifs_readdata, refcount);
2829        unsigned int i;
2830
2831        for (i = 0; i < rdata->nr_pages; i++) {
2832                put_page(rdata->pages[i]);
2833                rdata->pages[i] = NULL;
2834        }
2835        cifs_readdata_release(refcount);
2836}
2837
2838/**
2839 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2840 * @rdata:      the readdata response with list of pages holding data
2841 * @iter:       destination for our data
2842 *
2843 * This function copies data from a list of pages in a readdata response into
2844 * an array of iovecs. It will first calculate where the data should go
2845 * based on the info in the readdata and then copy the data into that spot.
2846 */
2847static int
2848cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2849{
2850        size_t remaining = rdata->got_bytes;
2851        unsigned int i;
2852
2853        for (i = 0; i < rdata->nr_pages; i++) {
2854                struct page *page = rdata->pages[i];
2855                size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2856                size_t written = copy_page_to_iter(page, 0, copy, iter);
2857                remaining -= written;
2858                if (written < copy && iov_iter_count(iter) > 0)
2859                        break;
2860        }
2861        return remaining ? -EFAULT : 0;
2862}
2863
2864static void
2865cifs_uncached_readv_complete(struct work_struct *work)
2866{
2867        struct cifs_readdata *rdata = container_of(work,
2868                                                struct cifs_readdata, work);
2869
2870        complete(&rdata->done);
2871        kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2872}
2873
2874static int
2875cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2876                        struct cifs_readdata *rdata, unsigned int len)
2877{
2878        int result = 0;
2879        unsigned int i;
2880        unsigned int nr_pages = rdata->nr_pages;
2881        struct kvec iov;
2882
2883        rdata->got_bytes = 0;
2884        rdata->tailsz = PAGE_SIZE;
2885        for (i = 0; i < nr_pages; i++) {
2886                struct page *page = rdata->pages[i];
2887
2888                if (len >= PAGE_SIZE) {
2889                        /* enough data to fill the page */
2890                        iov.iov_base = kmap(page);
2891                        iov.iov_len = PAGE_SIZE;
2892                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2893                                 i, iov.iov_base, iov.iov_len);
2894                        len -= PAGE_SIZE;
2895                } else if (len > 0) {
2896                        /* enough for partial page, fill and zero the rest */
2897                        iov.iov_base = kmap(page);
2898                        iov.iov_len = len;
2899                        cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2900                                 i, iov.iov_base, iov.iov_len);
2901                        memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2902                        rdata->tailsz = len;
2903                        len = 0;
2904                } else {
2905                        /* no need to hold page hostage */
2906                        rdata->pages[i] = NULL;
2907                        rdata->nr_pages--;
2908                        put_page(page);
2909                        continue;
2910                }
2911
2912                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2913                kunmap(page);
2914                if (result < 0)
2915                        break;
2916
2917                rdata->got_bytes += result;
2918        }
2919
2920        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
2921                                                rdata->got_bytes : result;
2922}
2923
2924static int
2925cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
2926                     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list)
2927{
2928        struct cifs_readdata *rdata;
2929        unsigned int npages, rsize, credits;
2930        size_t cur_len;
2931        int rc;
2932        pid_t pid;
2933        struct TCP_Server_Info *server;
2934
2935        server = tlink_tcon(open_file->tlink)->ses->server;
2936
2937        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2938                pid = open_file->pid;
2939        else
2940                pid = current->tgid;
2941
2942        do {
2943                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
2944                                                   &rsize, &credits);
2945                if (rc)
2946                        break;
2947
2948                cur_len = min_t(const size_t, len, rsize);
2949                npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2950
2951                /* allocate a readdata struct */
2952                rdata = cifs_readdata_alloc(npages,
2953                                            cifs_uncached_readv_complete);
2954                if (!rdata) {
2955                        add_credits_and_wake_if(server, credits, 0);
2956                        rc = -ENOMEM;
2957                        break;
2958                }
2959
2960                rc = cifs_read_allocate_pages(rdata, npages);
2961                if (rc)
2962                        goto error;
2963
2964                rdata->cfile = cifsFileInfo_get(open_file);
2965                rdata->nr_pages = npages;
2966                rdata->offset = offset;
2967                rdata->bytes = cur_len;
2968                rdata->pid = pid;
2969                rdata->pagesz = PAGE_SIZE;
2970                rdata->read_into_pages = cifs_uncached_read_into_pages;
2971                rdata->credits = credits;
2972
2973                if (!rdata->cfile->invalidHandle ||
2974                    !cifs_reopen_file(rdata->cfile, true))
2975                        rc = server->ops->async_readv(rdata);
2976error:
2977                if (rc) {
2978                        add_credits_and_wake_if(server, rdata->credits, 0);
2979                        kref_put(&rdata->refcount,
2980                                 cifs_uncached_readdata_release);
2981                        if (rc == -EAGAIN)
2982                                continue;
2983                        break;
2984                }
2985
2986                list_add_tail(&rdata->list, rdata_list);
2987                offset += cur_len;
2988                len -= cur_len;
2989        } while (len > 0);
2990
2991        return rc;
2992}
2993
2994ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
2995{
2996        struct file *file = iocb->ki_filp;
2997        ssize_t rc;
2998        size_t len;
2999        ssize_t total_read = 0;
3000        loff_t offset = iocb->ki_pos;
3001        struct cifs_sb_info *cifs_sb;
3002        struct cifs_tcon *tcon;
3003        struct cifsFileInfo *open_file;
3004        struct cifs_readdata *rdata, *tmp;
3005        struct list_head rdata_list;
3006
3007        len = iov_iter_count(to);
3008        if (!len)
3009                return 0;
3010
3011        INIT_LIST_HEAD(&rdata_list);
3012        cifs_sb = CIFS_FILE_SB(file);
3013        open_file = file->private_data;
3014        tcon = tlink_tcon(open_file->tlink);
3015
3016        if (!tcon->ses->server->ops->async_readv)
3017                return -ENOSYS;
3018
3019        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3020                cifs_dbg(FYI, "attempting read on write only file instance\n");
3021
3022        rc = cifs_send_async_read(offset, len, open_file, cifs_sb, &rdata_list);
3023
3024        /* if at least one read request send succeeded, then reset rc */
3025        if (!list_empty(&rdata_list))
3026                rc = 0;
3027
3028        len = iov_iter_count(to);
3029        /* the loop below should proceed in the order of increasing offsets */
3030again:
3031        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
3032                if (!rc) {
3033                        /* FIXME: freezable sleep too? */
3034                        rc = wait_for_completion_killable(&rdata->done);
3035                        if (rc)
3036                                rc = -EINTR;
3037                        else if (rdata->result == -EAGAIN) {
3038                                /* resend call if it's a retryable error */
3039                                struct list_head tmp_list;
3040                                unsigned int got_bytes = rdata->got_bytes;
3041
3042                                list_del_init(&rdata->list);
3043                                INIT_LIST_HEAD(&tmp_list);
3044
3045                                /*
3046                                 * Got a part of data and then reconnect has
3047                                 * happened -- fill the buffer and continue
3048                                 * reading.
3049                                 */
3050                                if (got_bytes && got_bytes < rdata->bytes) {
3051                                        rc = cifs_readdata_to_iov(rdata, to);
3052                                        if (rc) {
3053                                                kref_put(&rdata->refcount,
3054                                                cifs_uncached_readdata_release);
3055                                                continue;
3056                                        }
3057                                }
3058
3059                                rc = cifs_send_async_read(
3060                                                rdata->offset + got_bytes,
3061                                                rdata->bytes - got_bytes,
3062                                                rdata->cfile, cifs_sb,
3063                                                &tmp_list);
3064
3065                                list_splice(&tmp_list, &rdata_list);
3066
3067                                kref_put(&rdata->refcount,
3068                                         cifs_uncached_readdata_release);
3069                                goto again;
3070                        } else if (rdata->result)
3071                                rc = rdata->result;
3072                        else
3073                                rc = cifs_readdata_to_iov(rdata, to);
3074
3075                        /* if there was a short read -- discard anything left */
3076                        if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3077                                rc = -ENODATA;
3078                }
3079                list_del_init(&rdata->list);
3080                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3081        }
3082
3083        total_read = len - iov_iter_count(to);
3084
3085        cifs_stats_bytes_read(tcon, total_read);
3086
3087        /* mask nodata case */
3088        if (rc == -ENODATA)
3089                rc = 0;
3090
3091        if (total_read) {
3092                iocb->ki_pos += total_read;
3093                return total_read;
3094        }
3095        return rc;
3096}
3097
3098ssize_t
3099cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3100{
3101        struct inode *inode = file_inode(iocb->ki_filp);
3102        struct cifsInodeInfo *cinode = CIFS_I(inode);
3103        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3104        struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3105                                                iocb->ki_filp->private_data;
3106        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3107        int rc = -EACCES;
3108
3109        /*
3110         * In strict cache mode we need to read from the server all the time
3111         * if we don't have level II oplock because the server can delay mtime
3112         * change - so we can't make a decision about inode invalidating.
3113         * And we can also fail with pagereading if there are mandatory locks
3114         * on pages affected by this read but not on the region from pos to
3115         * pos+len-1.
3116         */
3117        if (!CIFS_CACHE_READ(cinode))
3118                return cifs_user_readv(iocb, to);
3119
3120        if (cap_unix(tcon->ses) &&
3121            (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3122            ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3123                return generic_file_read_iter(iocb, to);
3124
3125        /*
3126         * We need to hold the sem to be sure nobody modifies lock list
3127         * with a brlock that prevents reading.
3128         */
3129        down_read(&cinode->lock_sem);
3130        if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3131                                     tcon->ses->server->vals->shared_lock_type,
3132                                     NULL, CIFS_READ_OP))
3133                rc = generic_file_read_iter(iocb, to);
3134        up_read(&cinode->lock_sem);
3135        return rc;
3136}
3137
3138static ssize_t
3139cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3140{
3141        int rc = -EACCES;
3142        unsigned int bytes_read = 0;
3143        unsigned int total_read;
3144        unsigned int current_read_size;
3145        unsigned int rsize;
3146        struct cifs_sb_info *cifs_sb;
3147        struct cifs_tcon *tcon;
3148        struct TCP_Server_Info *server;
3149        unsigned int xid;
3150        char *cur_offset;
3151        struct cifsFileInfo *open_file;
3152        struct cifs_io_parms io_parms;
3153        int buf_type = CIFS_NO_BUFFER;
3154        __u32 pid;
3155
3156        xid = get_xid();
3157        cifs_sb = CIFS_FILE_SB(file);
3158
3159        /* FIXME: set up handlers for larger reads and/or convert to async */
3160        rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3161
3162        if (file->private_data == NULL) {
3163                rc = -EBADF;
3164                free_xid(xid);
3165                return rc;
3166        }
3167        open_file = file->private_data;
3168        tcon = tlink_tcon(open_file->tlink);
3169        server = tcon->ses->server;
3170
3171        if (!server->ops->sync_read) {
3172                free_xid(xid);
3173                return -ENOSYS;
3174        }
3175
3176        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3177                pid = open_file->pid;
3178        else
3179                pid = current->tgid;
3180
3181        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3182                cifs_dbg(FYI, "attempting read on write only file instance\n");
3183
3184        for (total_read = 0, cur_offset = read_data; read_size > total_read;
3185             total_read += bytes_read, cur_offset += bytes_read) {
3186                do {
3187                        current_read_size = min_t(uint, read_size - total_read,
3188                                                  rsize);
3189                        /*
3190                         * For windows me and 9x we do not want to request more
3191                         * than it negotiated since it will refuse the read
3192                         * then.
3193                         */
3194                        if ((tcon->ses) && !(tcon->ses->capabilities &
3195                                tcon->ses->server->vals->cap_large_files)) {
3196                                current_read_size = min_t(uint,
3197                                        current_read_size, CIFSMaxBufSize);
3198                        }
3199                        if (open_file->invalidHandle) {
3200                                rc = cifs_reopen_file(open_file, true);
3201                                if (rc != 0)
3202                                        break;
3203                        }
3204                        io_parms.pid = pid;
3205                        io_parms.tcon = tcon;
3206                        io_parms.offset = *offset;
3207                        io_parms.length = current_read_size;
3208                        rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3209                                                    &bytes_read, &cur_offset,
3210                                                    &buf_type);
3211                } while (rc == -EAGAIN);
3212
3213                if (rc || (bytes_read == 0)) {
3214                        if (total_read) {
3215                                break;
3216                        } else {
3217                                free_xid(xid);
3218                                return rc;
3219                        }
3220                } else {
3221                        cifs_stats_bytes_read(tcon, total_read);
3222                        *offset += bytes_read;
3223                }
3224        }
3225        free_xid(xid);
3226        return total_read;
3227}
3228
3229/*
3230 * If the page is mmap'ed into a process' page tables, then we need to make
3231 * sure that it doesn't change while being written back.
3232 */
3233static int
3234cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3235{
3236        struct page *page = vmf->page;
3237
3238        lock_page(page);
3239        return VM_FAULT_LOCKED;
3240}
3241
3242static struct vm_operations_struct cifs_file_vm_ops = {
3243        .fault = filemap_fault,
3244        .map_pages = filemap_map_pages,
3245        .page_mkwrite = cifs_page_mkwrite,
3246};
3247
3248int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3249{
3250        int rc, xid;
3251        struct inode *inode = file_inode(file);
3252
3253        xid = get_xid();
3254
3255        if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3256                rc = cifs_zap_mapping(inode);
3257                if (rc)
3258                        return rc;
3259        }
3260
3261        rc = generic_file_mmap(file, vma);
3262        if (rc == 0)
3263                vma->vm_ops = &cifs_file_vm_ops;
3264        free_xid(xid);
3265        return rc;
3266}
3267
3268int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3269{
3270        int rc, xid;
3271
3272        xid = get_xid();
3273        rc = cifs_revalidate_file(file);
3274        if (rc) {
3275                cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3276                         rc);
3277                free_xid(xid);
3278                return rc;
3279        }
3280        rc = generic_file_mmap(file, vma);
3281        if (rc == 0)
3282                vma->vm_ops = &cifs_file_vm_ops;
3283        free_xid(xid);
3284        return rc;
3285}
3286
3287static void
3288cifs_readv_complete(struct work_struct *work)
3289{
3290        unsigned int i, got_bytes;
3291        struct cifs_readdata *rdata = container_of(work,
3292                                                struct cifs_readdata, work);
3293
3294        got_bytes = rdata->got_bytes;
3295        for (i = 0; i < rdata->nr_pages; i++) {
3296                struct page *page = rdata->pages[i];
3297
3298                lru_cache_add_file(page);
3299
3300                if (rdata->result == 0 ||
3301                    (rdata->result == -EAGAIN && got_bytes)) {
3302                        flush_dcache_page(page);
3303                        SetPageUptodate(page);
3304                }
3305
3306                unlock_page(page);
3307
3308                if (rdata->result == 0 ||
3309                    (rdata->result == -EAGAIN && got_bytes))
3310                        cifs_readpage_to_fscache(rdata->mapping->host, page);
3311
3312                got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
3313
3314                page_cache_release(page);
3315                rdata->pages[i] = NULL;
3316        }
3317        kref_put(&rdata->refcount, cifs_readdata_release);
3318}
3319
3320static int
3321cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3322                        struct cifs_readdata *rdata, unsigned int len)
3323{
3324        int result = 0;
3325        unsigned int i;
3326        u64 eof;
3327        pgoff_t eof_index;
3328        unsigned int nr_pages = rdata->nr_pages;
3329        struct kvec iov;
3330
3331        /* determine the eof that the server (probably) has */
3332        eof = CIFS_I(rdata->mapping->host)->server_eof;
3333        eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3334        cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3335
3336        rdata->got_bytes = 0;
3337        rdata->tailsz = PAGE_CACHE_SIZE;
3338        for (i = 0; i < nr_pages; i++) {
3339                struct page *page = rdata->pages[i];
3340
3341                if (len >= PAGE_CACHE_SIZE) {
3342                        /* enough data to fill the page */
3343                        iov.iov_base = kmap(page);
3344                        iov.iov_len = PAGE_CACHE_SIZE;
3345                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3346                                 i, page->index, iov.iov_base, iov.iov_len);
3347                        len -= PAGE_CACHE_SIZE;
3348                } else if (len > 0) {
3349                        /* enough for partial page, fill and zero the rest */
3350                        iov.iov_base = kmap(page);
3351                        iov.iov_len = len;
3352                        cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3353                                 i, page->index, iov.iov_base, iov.iov_len);
3354                        memset(iov.iov_base + len,
3355                                '\0', PAGE_CACHE_SIZE - len);
3356                        rdata->tailsz = len;
3357                        len = 0;
3358                } else if (page->index > eof_index) {
3359                        /*
3360                         * The VFS will not try to do readahead past the
3361                         * i_size, but it's possible that we have outstanding
3362                         * writes with gaps in the middle and the i_size hasn't
3363                         * caught up yet. Populate those with zeroed out pages
3364                         * to prevent the VFS from repeatedly attempting to
3365                         * fill them until the writes are flushed.
3366                         */
3367                        zero_user(page, 0, PAGE_CACHE_SIZE);
3368                        lru_cache_add_file(page);
3369                        flush_dcache_page(page);
3370                        SetPageUptodate(page);
3371                        unlock_page(page);
3372                        page_cache_release(page);
3373                        rdata->pages[i] = NULL;
3374                        rdata->nr_pages--;
3375                        continue;
3376                } else {
3377                        /* no need to hold page hostage */
3378                        lru_cache_add_file(page);
3379                        unlock_page(page);
3380                        page_cache_release(page);
3381                        rdata->pages[i] = NULL;
3382                        rdata->nr_pages--;
3383                        continue;
3384                }
3385
3386                result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3387                kunmap(page);
3388                if (result < 0)
3389                        break;
3390
3391                rdata->got_bytes += result;
3392        }
3393
3394        return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3395                                                rdata->got_bytes : result;
3396}
3397
3398static int
3399readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3400                    unsigned int rsize, struct list_head *tmplist,
3401                    unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3402{
3403        struct page *page, *tpage;
3404        unsigned int expected_index;
3405        int rc;
3406
3407        INIT_LIST_HEAD(tmplist);
3408
3409        page = list_entry(page_list->prev, struct page, lru);
3410
3411        /*
3412         * Lock the page and put it in the cache. Since no one else
3413         * should have access to this page, we're safe to simply set
3414         * PG_locked without checking it first.
3415         */
3416        __set_page_locked(page);
3417        rc = add_to_page_cache_locked(page, mapping,
3418                                      page->index, GFP_KERNEL);
3419
3420        /* give up if we can't stick it in the cache */
3421        if (rc) {
3422                __clear_page_locked(page);
3423                return rc;
3424        }
3425
3426        /* move first page to the tmplist */
3427        *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3428        *bytes = PAGE_CACHE_SIZE;
3429        *nr_pages = 1;
3430        list_move_tail(&page->lru, tmplist);
3431
3432        /* now try and add more pages onto the request */
3433        expected_index = page->index + 1;
3434        list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3435                /* discontinuity ? */
3436                if (page->index != expected_index)
3437                        break;
3438
3439                /* would this page push the read over the rsize? */
3440                if (*bytes + PAGE_CACHE_SIZE > rsize)
3441                        break;
3442
3443                __set_page_locked(page);
3444                if (add_to_page_cache_locked(page, mapping, page->index,
3445                                                                GFP_KERNEL)) {
3446                        __clear_page_locked(page);
3447                        break;
3448                }
3449                list_move_tail(&page->lru, tmplist);
3450                (*bytes) += PAGE_CACHE_SIZE;
3451                expected_index++;
3452                (*nr_pages)++;
3453        }
3454        return rc;
3455}
3456
3457static int cifs_readpages(struct file *file, struct address_space *mapping,
3458        struct list_head *page_list, unsigned num_pages)
3459{
3460        int rc;
3461        struct list_head tmplist;
3462        struct cifsFileInfo *open_file = file->private_data;
3463        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3464        struct TCP_Server_Info *server;
3465        pid_t pid;
3466
3467        /*
3468         * Reads as many pages as possible from fscache. Returns -ENOBUFS
3469         * immediately if the cookie is negative
3470         *
3471         * After this point, every page in the list might have PG_fscache set,
3472         * so we will need to clean that up off of every page we don't use.
3473         */
3474        rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3475                                         &num_pages);
3476        if (rc == 0)
3477                return rc;
3478
3479        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3480                pid = open_file->pid;
3481        else
3482                pid = current->tgid;
3483
3484        rc = 0;
3485        server = tlink_tcon(open_file->tlink)->ses->server;
3486
3487        cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3488                 __func__, file, mapping, num_pages);
3489
3490        /*
3491         * Start with the page at end of list and move it to private
3492         * list. Do the same with any following pages until we hit
3493         * the rsize limit, hit an index discontinuity, or run out of
3494         * pages. Issue the async read and then start the loop again
3495         * until the list is empty.
3496         *
3497         * Note that list order is important. The page_list is in
3498         * the order of declining indexes. When we put the pages in
3499         * the rdata->pages, then we want them in increasing order.
3500         */
3501        while (!list_empty(page_list)) {
3502                unsigned int i, nr_pages, bytes, rsize;
3503                loff_t offset;
3504                struct page *page, *tpage;
3505                struct cifs_readdata *rdata;
3506                unsigned credits;
3507
3508                rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3509                                                   &rsize, &credits);
3510                if (rc)
3511                        break;
3512
3513                /*
3514                 * Give up immediately if rsize is too small to read an entire
3515                 * page. The VFS will fall back to readpage. We should never
3516                 * reach this point however since we set ra_pages to 0 when the
3517                 * rsize is smaller than a cache page.
3518                 */
3519                if (unlikely(rsize < PAGE_CACHE_SIZE)) {
3520                        add_credits_and_wake_if(server, credits, 0);
3521                        return 0;
3522                }
3523
3524                rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3525                                         &nr_pages, &offset, &bytes);
3526                if (rc) {
3527                        add_credits_and_wake_if(server, credits, 0);
3528                        break;
3529                }
3530
3531                rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3532                if (!rdata) {
3533                        /* best to give up if we're out of mem */
3534                        list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3535                                list_del(&page->lru);
3536                                lru_cache_add_file(page);
3537                                unlock_page(page);
3538                                page_cache_release(page);
3539                        }
3540                        rc = -ENOMEM;
3541                        add_credits_and_wake_if(server, credits, 0);
3542                        break;
3543                }
3544
3545                rdata->cfile = cifsFileInfo_get(open_file);
3546                rdata->mapping = mapping;
3547                rdata->offset = offset;
3548                rdata->bytes = bytes;
3549                rdata->pid = pid;
3550                rdata->pagesz = PAGE_CACHE_SIZE;
3551                rdata->read_into_pages = cifs_readpages_read_into_pages;
3552                rdata->credits = credits;
3553
3554                list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3555                        list_del(&page->lru);
3556                        rdata->pages[rdata->nr_pages++] = page;
3557                }
3558
3559                if (!rdata->cfile->invalidHandle ||
3560                    !cifs_reopen_file(rdata->cfile, true))
3561                        rc = server->ops->async_readv(rdata);
3562                if (rc) {
3563                        add_credits_and_wake_if(server, rdata->credits, 0);
3564                        for (i = 0; i < rdata->nr_pages; i++) {
3565                                page = rdata->pages[i];
3566                                lru_cache_add_file(page);
3567                                unlock_page(page);
3568                                page_cache_release(page);
3569                        }
3570                        /* Fallback to the readpage in error/reconnect cases */
3571                        kref_put(&rdata->refcount, cifs_readdata_release);
3572                        break;
3573                }
3574
3575                kref_put(&rdata->refcount, cifs_readdata_release);
3576        }
3577
3578        /* Any pages that have been shown to fscache but didn't get added to
3579         * the pagecache must be uncached before they get returned to the
3580         * allocator.
3581         */
3582        cifs_fscache_readpages_cancel(mapping->host, page_list);
3583        return rc;
3584}
3585
3586/*
3587 * cifs_readpage_worker must be called with the page pinned
3588 */
3589static int cifs_readpage_worker(struct file *file, struct page *page,
3590        loff_t *poffset)
3591{
3592        char *read_data;
3593        int rc;
3594
3595        /* Is the page cached? */
3596        rc = cifs_readpage_from_fscache(file_inode(file), page);
3597        if (rc == 0)
3598                goto read_complete;
3599
3600        read_data = kmap(page);
3601        /* for reads over a certain size could initiate async read ahead */
3602
3603        rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3604
3605        if (rc < 0)
3606                goto io_error;
3607        else
3608                cifs_dbg(FYI, "Bytes read %d\n", rc);
3609
3610        file_inode(file)->i_atime =
3611                current_fs_time(file_inode(file)->i_sb);
3612
3613        if (PAGE_CACHE_SIZE > rc)
3614                memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3615
3616        flush_dcache_page(page);
3617        SetPageUptodate(page);
3618
3619        /* send this page to the cache */
3620        cifs_readpage_to_fscache(file_inode(file), page);
3621
3622        rc = 0;
3623
3624io_error:
3625        kunmap(page);
3626        unlock_page(page);
3627
3628read_complete:
3629        return rc;
3630}
3631
3632static int cifs_readpage(struct file *file, struct page *page)
3633{
3634        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3635        int rc = -EACCES;
3636        unsigned int xid;
3637
3638        xid = get_xid();
3639
3640        if (file->private_data == NULL) {
3641                rc = -EBADF;
3642                free_xid(xid);
3643                return rc;
3644        }
3645
3646        cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3647                 page, (int)offset, (int)offset);
3648
3649        rc = cifs_readpage_worker(file, page, &offset);
3650
3651        free_xid(xid);
3652        return rc;
3653}
3654
3655static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3656{
3657        struct cifsFileInfo *open_file;
3658
3659        spin_lock(&cifs_file_list_lock);
3660        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3661                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3662                        spin_unlock(&cifs_file_list_lock);
3663                        return 1;
3664                }
3665        }
3666        spin_unlock(&cifs_file_list_lock);
3667        return 0;
3668}
3669
3670/* We do not want to update the file size from server for inodes
3671   open for write - to avoid races with writepage extending
3672   the file - in the future we could consider allowing
3673   refreshing the inode only on increases in the file size
3674   but this is tricky to do without racing with writebehind
3675   page caching in the current Linux kernel design */
3676bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3677{
3678        if (!cifsInode)
3679                return true;
3680
3681        if (is_inode_writable(cifsInode)) {
3682                /* This inode is open for write at least once */
3683                struct cifs_sb_info *cifs_sb;
3684
3685                cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3686                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3687                        /* since no page cache to corrupt on directio
3688                        we can change size safely */
3689                        return true;
3690                }
3691
3692                if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3693                        return true;
3694
3695                return false;
3696        } else
3697                return true;
3698}
3699
3700static int cifs_write_begin(struct file *file, struct address_space *mapping,
3701                        loff_t pos, unsigned len, unsigned flags,
3702                        struct page **pagep, void **fsdata)
3703{
3704        int oncethru = 0;
3705        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3706        loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3707        loff_t page_start = pos & PAGE_MASK;
3708        loff_t i_size;
3709        struct page *page;
3710        int rc = 0;
3711
3712        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3713
3714start:
3715        page = grab_cache_page_write_begin(mapping, index, flags);
3716        if (!page) {
3717                rc = -ENOMEM;
3718                goto out;
3719        }
3720
3721        if (PageUptodate(page))
3722                goto out;
3723
3724        /*
3725         * If we write a full page it will be up to date, no need to read from
3726         * the server. If the write is short, we'll end up doing a sync write
3727         * instead.
3728         */
3729        if (len == PAGE_CACHE_SIZE)
3730                goto out;
3731
3732        /*
3733         * optimize away the read when we have an oplock, and we're not
3734         * expecting to use any of the data we'd be reading in. That
3735         * is, when the page lies beyond the EOF, or straddles the EOF
3736         * and the write will cover all of the existing data.
3737         */
3738        if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3739                i_size = i_size_read(mapping->host);
3740                if (page_start >= i_size ||
3741                    (offset == 0 && (pos + len) >= i_size)) {
3742                        zero_user_segments(page, 0, offset,
3743                                           offset + len,
3744                                           PAGE_CACHE_SIZE);
3745                        /*
3746                         * PageChecked means that the parts of the page
3747                         * to which we're not writing are considered up
3748                         * to date. Once the data is copied to the
3749                         * page, it can be set uptodate.
3750                         */
3751                        SetPageChecked(page);
3752                        goto out;
3753                }
3754        }
3755
3756        if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3757                /*
3758                 * might as well read a page, it is fast enough. If we get
3759                 * an error, we don't need to return it. cifs_write_end will
3760                 * do a sync write instead since PG_uptodate isn't set.
3761                 */
3762                cifs_readpage_worker(file, page, &page_start);
3763                page_cache_release(page);
3764                oncethru = 1;
3765                goto start;
3766        } else {
3767                /* we could try using another file handle if there is one -
3768                   but how would we lock it to prevent close of that handle
3769                   racing with this read? In any case
3770                   this will be written out by write_end so is fine */
3771        }
3772out:
3773        *pagep = page;
3774        return rc;
3775}
3776
3777static int cifs_release_page(struct page *page, gfp_t gfp)
3778{
3779        if (PagePrivate(page))
3780                return 0;
3781
3782        return cifs_fscache_release_page(page, gfp);
3783}
3784
3785static void cifs_invalidate_page(struct page *page, unsigned int offset,
3786                                 unsigned int length)
3787{
3788        struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3789
3790        if (offset == 0 && length == PAGE_CACHE_SIZE)
3791                cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3792}
3793
3794static int cifs_launder_page(struct page *page)
3795{
3796        int rc = 0;
3797        loff_t range_start = page_offset(page);
3798        loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3799        struct writeback_control wbc = {
3800                .sync_mode = WB_SYNC_ALL,
3801                .nr_to_write = 0,
3802                .range_start = range_start,
3803                .range_end = range_end,
3804        };
3805
3806        cifs_dbg(FYI, "Launder page: %p\n", page);
3807
3808        if (clear_page_dirty_for_io(page))
3809                rc = cifs_writepage_locked(page, &wbc);
3810
3811        cifs_fscache_invalidate_page(page, page->mapping->host);
3812        return rc;
3813}
3814
3815void cifs_oplock_break(struct work_struct *work)
3816{
3817        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3818                                                  oplock_break);
3819        struct inode *inode = cfile->dentry->d_inode;
3820        struct cifsInodeInfo *cinode = CIFS_I(inode);
3821        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3822        struct TCP_Server_Info *server = tcon->ses->server;
3823        int rc = 0;
3824
3825        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
3826                        TASK_UNINTERRUPTIBLE);
3827
3828        server->ops->downgrade_oplock(server, cinode,
3829                test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
3830
3831        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
3832                                                cifs_has_mand_locks(cinode)) {
3833                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3834                         inode);
3835                cinode->oplock = 0;
3836        }
3837
3838        if (inode && S_ISREG(inode->i_mode)) {
3839                if (CIFS_CACHE_READ(cinode))
3840                        break_lease(inode, O_RDONLY);
3841                else
3842                        break_lease(inode, O_WRONLY);
3843                rc = filemap_fdatawrite(inode->i_mapping);
3844                if (!CIFS_CACHE_READ(cinode)) {
3845                        rc = filemap_fdatawait(inode->i_mapping);
3846                        mapping_set_error(inode->i_mapping, rc);
3847                        cifs_zap_mapping(inode);
3848                }
3849                cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3850        }
3851
3852        rc = cifs_push_locks(cfile);
3853        if (rc)
3854                cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3855
3856        /*
3857         * releasing stale oplock after recent reconnect of smb session using
3858         * a now incorrect file handle is not a data integrity issue but do
3859         * not bother sending an oplock release if session to server still is
3860         * disconnected since oplock already released by the server
3861         */
3862        if (!cfile->oplock_break_cancelled) {
3863                rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3864                                                             cinode);
3865                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3866        }
3867        cifs_done_oplock_break(cinode);
3868}
3869
3870/*
3871 * The presence of cifs_direct_io() in the address space ops vector
3872 * allowes open() O_DIRECT flags which would have failed otherwise.
3873 *
3874 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
3875 * so this method should never be called.
3876 *
3877 * Direct IO is not yet supported in the cached mode. 
3878 */
3879static ssize_t
3880cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
3881               loff_t pos)
3882{
3883        /*
3884         * FIXME
3885         * Eventually need to support direct IO for non forcedirectio mounts
3886         */
3887        return -EINVAL;
3888}
3889
3890
3891const struct address_space_operations cifs_addr_ops = {
3892        .readpage = cifs_readpage,
3893        .readpages = cifs_readpages,
3894        .writepage = cifs_writepage,
3895        .writepages = cifs_writepages,
3896        .write_begin = cifs_write_begin,
3897        .write_end = cifs_write_end,
3898        .set_page_dirty = __set_page_dirty_nobuffers,
3899        .releasepage = cifs_release_page,
3900        .direct_IO = cifs_direct_io,
3901        .invalidatepage = cifs_invalidate_page,
3902        .launder_page = cifs_launder_page,
3903};
3904
3905/*
3906 * cifs_readpages requires the server to support a buffer large enough to
3907 * contain the header plus one complete page of data.  Otherwise, we need
3908 * to leave cifs_readpages out of the address space operations.
3909 */
3910const struct address_space_operations cifs_addr_ops_smallbuf = {
3911        .readpage = cifs_readpage,
3912        .writepage = cifs_writepage,
3913        .writepages = cifs_writepages,
3914        .write_begin = cifs_write_begin,
3915        .write_end = cifs_write_end,
3916        .set_page_dirty = __set_page_dirty_nobuffers,
3917        .releasepage = cifs_release_page,
3918        .invalidatepage = cifs_invalidate_page,
3919        .launder_page = cifs_launder_page,
3920};
3921