linux/drivers/staging/lustre/lustre/llite/llite_internal.h
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2011, 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 */
  36
  37#ifndef LLITE_INTERNAL_H
  38#define LLITE_INTERNAL_H
  39#include "../include/lustre_debug.h"
  40#include "../include/lustre_ver.h"
  41#include "../include/lustre_disk.h"     /* for s2sbi */
  42#include "../include/lustre_eacl.h"
  43
  44/* for struct cl_lock_descr and struct cl_io */
  45#include "../include/cl_object.h"
  46#include "../include/lclient.h"
  47#include "../include/lustre_mdc.h"
  48#include "../include/lustre_intent.h"
  49#include <linux/compat.h>
  50#include <linux/posix_acl_xattr.h>
  51
  52#ifndef FMODE_EXEC
  53#define FMODE_EXEC 0
  54#endif
  55
  56#ifndef VM_FAULT_RETRY
  57#define VM_FAULT_RETRY 0
  58#endif
  59
  60/* Kernel 3.1 kills LOOKUP_CONTINUE, LOOKUP_PARENT is equivalent to it.
  61 * seem kernel commit 49084c3bb2055c401f3493c13edae14d49128ca0 */
  62#ifndef LOOKUP_CONTINUE
  63#define LOOKUP_CONTINUE LOOKUP_PARENT
  64#endif
  65
  66/** Only used on client-side for indicating the tail of dir hash/offset. */
  67#define LL_DIR_END_OFF    0x7fffffffffffffffULL
  68#define LL_DIR_END_OFF_32BIT    0x7fffffffUL
  69
  70#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
  71#define LUSTRE_FPRIVATE(file) ((file)->private_data)
  72
  73struct ll_dentry_data {
  74        struct lookup_intent            *lld_it;
  75        unsigned int                    lld_sa_generation;
  76        unsigned int                    lld_invalid:1;
  77        struct rcu_head                 lld_rcu_head;
  78};
  79
  80#define ll_d2d(de) ((struct ll_dentry_data*)((de)->d_fsdata))
  81
  82#define LLI_INODE_MAGIC          0x111d0de5
  83#define LLI_INODE_DEAD            0xdeadd00d
  84
  85/* remote client permission cache */
  86#define REMOTE_PERM_HASHSIZE 16
  87
  88struct ll_getname_data {
  89        struct dir_context ctx;
  90        char        *lgd_name;      /* points to a buffer with NAME_MAX+1 size */
  91        struct lu_fid    lgd_fid;       /* target fid we are looking for */
  92        int           lgd_found;     /* inode matched? */
  93};
  94
  95/* llite setxid/access permission for user on remote client */
  96struct ll_remote_perm {
  97        struct hlist_node       lrp_list;
  98        uid_t              lrp_uid;
  99        gid_t              lrp_gid;
 100        uid_t              lrp_fsuid;
 101        gid_t              lrp_fsgid;
 102        int                  lrp_access_perm; /* MAY_READ/WRITE/EXEC, this
 103                                                    is access permission with
 104                                                    lrp_fsuid/lrp_fsgid. */
 105};
 106
 107enum lli_flags {
 108        /* MDS has an authority for the Size-on-MDS attributes. */
 109        LLIF_MDS_SIZE_LOCK      = (1 << 0),
 110        /* Epoch close is postponed. */
 111        LLIF_EPOCH_PENDING      = (1 << 1),
 112        /* DONE WRITING is allowed. */
 113        LLIF_DONE_WRITING       = (1 << 2),
 114        /* Sizeon-on-MDS attributes are changed. An attribute update needs to
 115         * be sent to MDS. */
 116        LLIF_SOM_DIRTY    = (1 << 3),
 117        /* File data is modified. */
 118        LLIF_DATA_MODIFIED      = (1 << 4),
 119        /* File is being restored */
 120        LLIF_FILE_RESTORING     = (1 << 5),
 121        /* Xattr cache is attached to the file */
 122        LLIF_XATTR_CACHE        = (1 << 6),
 123};
 124
 125struct ll_inode_info {
 126        __u32                           lli_inode_magic;
 127        __u32                           lli_flags;
 128        __u64                           lli_ioepoch;
 129
 130        spinlock_t                      lli_lock;
 131        struct posix_acl                *lli_posix_acl;
 132
 133        struct hlist_head               *lli_remote_perms;
 134        struct mutex                            lli_rmtperm_mutex;
 135
 136        /* identifying fields for both metadata and data stacks. */
 137        struct lu_fid              lli_fid;
 138        /* Parent fid for accessing default stripe data on parent directory
 139         * for allocating OST objects after a mknod() and later open-by-FID. */
 140        struct lu_fid              lli_pfid;
 141
 142        struct list_head                      lli_close_list;
 143        struct list_head                      lli_oss_capas;
 144        /* open count currently used by capability only, indicate whether
 145         * capability needs renewal */
 146        atomic_t                    lli_open_count;
 147        struct obd_capa         *lli_mds_capa;
 148        unsigned long                 lli_rmtperm_time;
 149
 150        /* handle is to be sent to MDS later on done_writing and setattr.
 151         * Open handle data are needed for the recovery to reconstruct
 152         * the inode state on the MDS. XXX: recovery is not ready yet. */
 153        struct obd_client_handle       *lli_pending_och;
 154
 155        /* We need all three because every inode may be opened in different
 156         * modes */
 157        struct obd_client_handle       *lli_mds_read_och;
 158        struct obd_client_handle       *lli_mds_write_och;
 159        struct obd_client_handle       *lli_mds_exec_och;
 160        __u64                      lli_open_fd_read_count;
 161        __u64                      lli_open_fd_write_count;
 162        __u64                      lli_open_fd_exec_count;
 163        /* Protects access to och pointers and their usage counters */
 164        struct mutex                    lli_och_mutex;
 165
 166        struct inode                    lli_vfs_inode;
 167
 168        /* the most recent timestamps obtained from mds */
 169        struct ost_lvb                  lli_lvb;
 170        spinlock_t                      lli_agl_lock;
 171
 172        /* Try to make the d::member and f::member are aligned. Before using
 173         * these members, make clear whether it is directory or not. */
 174        union {
 175                /* for directory */
 176                struct {
 177                        /* serialize normal readdir and statahead-readdir. */
 178                        struct mutex                    d_readdir_mutex;
 179
 180                        /* metadata statahead */
 181                        /* since parent-child threads can share the same @file
 182                         * struct, "opendir_key" is the token when dir close for
 183                         * case of parent exit before child -- it is me should
 184                         * cleanup the dir readahead. */
 185                        void                       *d_opendir_key;
 186                        struct ll_statahead_info       *d_sai;
 187                        /* protect statahead stuff. */
 188                        spinlock_t                      d_sa_lock;
 189                        /* "opendir_pid" is the token when lookup/revalid
 190                         * -- I am the owner of dir statahead. */
 191                        pid_t                      d_opendir_pid;
 192                } d;
 193
 194#define lli_readdir_mutex       u.d.d_readdir_mutex
 195#define lli_opendir_key  u.d.d_opendir_key
 196#define lli_sai          u.d.d_sai
 197#define lli_sa_lock          u.d.d_sa_lock
 198#define lli_opendir_pid  u.d.d_opendir_pid
 199
 200                /* for non-directory */
 201                struct {
 202                        struct mutex                    f_size_mutex;
 203                        char                            *f_symlink_name;
 204                        __u64                           f_maxbytes;
 205                        /*
 206                         * struct rw_semaphore {
 207                         *    signed long       count;     // align d.d_def_acl
 208                         *    spinlock_t        wait_lock; // align d.d_sa_lock
 209                         *    struct list_head wait_list;
 210                         * }
 211                         */
 212                        struct rw_semaphore             f_trunc_sem;
 213                        struct mutex                    f_write_mutex;
 214
 215                        struct rw_semaphore             f_glimpse_sem;
 216                        unsigned long                   f_glimpse_time;
 217                        struct list_head                        f_agl_list;
 218                        __u64                           f_agl_index;
 219
 220                        /* for writepage() only to communicate to fsync */
 221                        int                             f_async_rc;
 222
 223                        /*
 224                         * whenever a process try to read/write the file, the
 225                         * jobid of the process will be saved here, and it'll
 226                         * be packed into the write PRC when flush later.
 227                         *
 228                         * so the read/write statistics for jobid will not be
 229                         * accurate if the file is shared by different jobs.
 230                         */
 231                        char                 f_jobid[JOBSTATS_JOBID_SIZE];
 232                } f;
 233
 234#define lli_size_mutex          u.f.f_size_mutex
 235#define lli_symlink_name        u.f.f_symlink_name
 236#define lli_maxbytes        u.f.f_maxbytes
 237#define lli_trunc_sem      u.f.f_trunc_sem
 238#define lli_write_mutex  u.f.f_write_mutex
 239#define lli_glimpse_sem         u.f.f_glimpse_sem
 240#define lli_glimpse_time        u.f.f_glimpse_time
 241#define lli_agl_list            u.f.f_agl_list
 242#define lli_agl_index           u.f.f_agl_index
 243#define lli_async_rc            u.f.f_async_rc
 244#define lli_jobid               u.f.f_jobid
 245
 246        } u;
 247
 248        /* XXX: For following frequent used members, although they maybe special
 249         *      used for non-directory object, it is some time-wasting to check
 250         *      whether the object is directory or not before using them. On the
 251         *      other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
 252         *      the "ll_inode_info" size even if moving those members into u.f.
 253         *      So keep them out side.
 254         *
 255         *      In the future, if more members are added only for directory,
 256         *      some of the following members can be moved into u.f.
 257         */
 258        bool                        lli_has_smd;
 259        struct cl_object               *lli_clob;
 260
 261        /* mutex to request for layout lock exclusively. */
 262        struct mutex                    lli_layout_mutex;
 263        /* Layout version, protected by lli_layout_lock */
 264        __u32                           lli_layout_gen;
 265        spinlock_t                      lli_layout_lock;
 266
 267        struct rw_semaphore             lli_xattrs_list_rwsem;
 268        struct mutex                    lli_xattrs_enq_lock;
 269        struct list_head                lli_xattrs;/* ll_xattr_entry->xe_list */
 270};
 271
 272static inline __u32 ll_layout_version_get(struct ll_inode_info *lli)
 273{
 274        __u32 gen;
 275
 276        spin_lock(&lli->lli_layout_lock);
 277        gen = lli->lli_layout_gen;
 278        spin_unlock(&lli->lli_layout_lock);
 279
 280        return gen;
 281}
 282
 283static inline void ll_layout_version_set(struct ll_inode_info *lli, __u32 gen)
 284{
 285        spin_lock(&lli->lli_layout_lock);
 286        lli->lli_layout_gen = gen;
 287        spin_unlock(&lli->lli_layout_lock);
 288}
 289
 290int ll_xattr_cache_destroy(struct inode *inode);
 291
 292int ll_xattr_cache_get(struct inode *inode,
 293                        const char *name,
 294                        char *buffer,
 295                        size_t size,
 296                        __u64 valid);
 297
 298/*
 299 * Locking to guarantee consistency of non-atomic updates to long long i_size,
 300 * consistency between file size and KMS.
 301 *
 302 * Implemented by ->lli_size_mutex and ->lsm_lock, nested in that order.
 303 */
 304
 305void ll_inode_size_lock(struct inode *inode);
 306void ll_inode_size_unlock(struct inode *inode);
 307
 308/* FIXME: replace the name of this with LL_I to conform to kernel stuff */
 309/* static inline struct ll_inode_info *LL_I(struct inode *inode) */
 310static inline struct ll_inode_info *ll_i2info(struct inode *inode)
 311{
 312        return container_of(inode, struct ll_inode_info, lli_vfs_inode);
 313}
 314
 315/* default to about 40meg of readahead on a given system.  That much tied
 316 * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
 317#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT))
 318
 319/* default to read-ahead full files smaller than 2MB on the second read */
 320#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_CACHE_SHIFT))
 321
 322enum ra_stat {
 323        RA_STAT_HIT = 0,
 324        RA_STAT_MISS,
 325        RA_STAT_DISTANT_READPAGE,
 326        RA_STAT_MISS_IN_WINDOW,
 327        RA_STAT_FAILED_GRAB_PAGE,
 328        RA_STAT_FAILED_MATCH,
 329        RA_STAT_DISCARDED,
 330        RA_STAT_ZERO_LEN,
 331        RA_STAT_ZERO_WINDOW,
 332        RA_STAT_EOF,
 333        RA_STAT_MAX_IN_FLIGHT,
 334        RA_STAT_WRONG_GRAB_PAGE,
 335        _NR_RA_STAT,
 336};
 337
 338struct ll_ra_info {
 339        atomic_t              ra_cur_pages;
 340        unsigned long        ra_max_pages;
 341        unsigned long        ra_max_pages_per_file;
 342        unsigned long        ra_max_read_ahead_whole_pages;
 343};
 344
 345/* ra_io_arg will be filled in the beginning of ll_readahead with
 346 * ras_lock, then the following ll_read_ahead_pages will read RA
 347 * pages according to this arg, all the items in this structure are
 348 * counted by page index.
 349 */
 350struct ra_io_arg {
 351        unsigned long ria_start;  /* start offset of read-ahead*/
 352        unsigned long ria_end;    /* end offset of read-ahead*/
 353        /* If stride read pattern is detected, ria_stoff means where
 354         * stride read is started. Note: for normal read-ahead, the
 355         * value here is meaningless, and also it will not be accessed*/
 356        pgoff_t ria_stoff;
 357        /* ria_length and ria_pages are the length and pages length in the
 358         * stride I/O mode. And they will also be used to check whether
 359         * it is stride I/O read-ahead in the read-ahead pages*/
 360        unsigned long ria_length;
 361        unsigned long ria_pages;
 362};
 363
 364/* LL_HIST_MAX=32 causes an overflow */
 365#define LL_HIST_MAX 28
 366#define LL_HIST_START 12 /* buckets start at 2^12 = 4k */
 367#define LL_PROCESS_HIST_MAX 10
 368struct per_process_info {
 369        pid_t pid;
 370        struct obd_histogram pp_r_hist;
 371        struct obd_histogram pp_w_hist;
 372};
 373
 374/* pp_extents[LL_PROCESS_HIST_MAX] will hold the combined process info */
 375struct ll_rw_extents_info {
 376        struct per_process_info pp_extents[LL_PROCESS_HIST_MAX + 1];
 377};
 378
 379#define LL_OFFSET_HIST_MAX 100
 380struct ll_rw_process_info {
 381        pid_t                rw_pid;
 382        int                    rw_op;
 383        loff_t              rw_range_start;
 384        loff_t              rw_range_end;
 385        loff_t              rw_last_file_pos;
 386        loff_t              rw_offset;
 387        size_t              rw_smallest_extent;
 388        size_t              rw_largest_extent;
 389        struct ll_file_data      *rw_last_file;
 390};
 391
 392enum stats_track_type {
 393        STATS_TRACK_ALL = 0,  /* track all processes */
 394        STATS_TRACK_PID,      /* track process with this pid */
 395        STATS_TRACK_PPID,     /* track processes with this ppid */
 396        STATS_TRACK_GID,      /* track processes with this gid */
 397        STATS_TRACK_LAST,
 398};
 399
 400/* flags for sbi->ll_flags */
 401#define LL_SBI_NOLCK         0x01 /* DLM locking disabled (directio-only) */
 402#define LL_SBI_CHECKSUM   0x02 /* checksum each page as it's written */
 403#define LL_SBI_FLOCK         0x04
 404#define LL_SBI_USER_XATTR       0x08 /* support user xattr */
 405#define LL_SBI_ACL             0x10 /* support ACL */
 406#define LL_SBI_RMT_CLIENT       0x40 /* remote client */
 407#define LL_SBI_MDS_CAPA   0x80 /* support mds capa */
 408#define LL_SBI_OSS_CAPA  0x100 /* support oss capa */
 409#define LL_SBI_LOCALFLOCK       0x200 /* Local flocks support by kernel */
 410#define LL_SBI_LRU_RESIZE       0x400 /* lru resize support */
 411#define LL_SBI_LAZYSTATFS       0x800 /* lazystatfs mount option */
 412#define LL_SBI_SOM_PREVIEW     0x1000 /* SOM preview mount option */
 413#define LL_SBI_32BIT_API       0x2000 /* generate 32 bit inodes. */
 414#define LL_SBI_64BIT_HASH      0x4000 /* support 64-bits dir hash/offset */
 415#define LL_SBI_AGL_ENABLED     0x8000 /* enable agl */
 416#define LL_SBI_VERBOSE  0x10000 /* verbose mount/umount */
 417#define LL_SBI_LAYOUT_LOCK    0x20000 /* layout lock support */
 418#define LL_SBI_USER_FID2PATH  0x40000 /* allow fid2path by unprivileged users */
 419#define LL_SBI_XATTR_CACHE    0x80000 /* support for xattr cache */
 420
 421#define LL_SBI_FLAGS {  \
 422        "nolck",        \
 423        "checksum",     \
 424        "flock",        \
 425        "xattr",        \
 426        "acl",          \
 427        "???",          \
 428        "rmt_client",   \
 429        "mds_capa",     \
 430        "oss_capa",     \
 431        "flock",        \
 432        "lru_resize",   \
 433        "lazy_statfs",  \
 434        "som",          \
 435        "32bit_api",    \
 436        "64bit_hash",   \
 437        "agl",          \
 438        "verbose",      \
 439        "layout",       \
 440        "user_fid2path",\
 441        "xattr",        \
 442}
 443
 444#define RCE_HASHES      32
 445
 446struct rmtacl_ctl_entry {
 447        struct list_head       rce_list;
 448        pid_t       rce_key; /* hash key */
 449        int           rce_ops; /* acl operation type */
 450};
 451
 452struct rmtacl_ctl_table {
 453        spinlock_t      rct_lock;
 454        struct list_head        rct_entries[RCE_HASHES];
 455};
 456
 457#define EE_HASHES       32
 458
 459struct eacl_table {
 460        spinlock_t      et_lock;
 461        struct list_head        et_entries[EE_HASHES];
 462};
 463
 464struct ll_sb_info {
 465        struct list_head                  ll_list;
 466        /* this protects pglist and ra_info.  It isn't safe to
 467         * grab from interrupt contexts */
 468        spinlock_t                ll_lock;
 469        spinlock_t                ll_pp_extent_lock; /* pp_extent entry*/
 470        spinlock_t                ll_process_lock; /* ll_rw_process_info */
 471        struct obd_uuid    ll_sb_uuid;
 472        struct obd_export       *ll_md_exp;
 473        struct obd_export       *ll_dt_exp;
 474        struct proc_dir_entry*    ll_proc_root;
 475        struct lu_fid        ll_root_fid; /* root object fid */
 476
 477        int                    ll_flags;
 478        unsigned int              ll_umounting:1,
 479                                  ll_xattr_cache_enabled:1;
 480        struct list_head                ll_conn_chain; /* per-conn chain of SBs */
 481        struct lustre_client_ocd  ll_lco;
 482
 483        struct list_head                ll_orphan_dentry_list; /*please don't ask -p*/
 484        struct ll_close_queue    *ll_lcq;
 485
 486        struct lprocfs_stats     *ll_stats; /* lprocfs stats counter */
 487
 488        struct cl_client_cache    ll_cache;
 489
 490        struct lprocfs_stats     *ll_ra_stats;
 491
 492        struct ll_ra_info        ll_ra_info;
 493        unsigned int          ll_namelen;
 494        struct file_operations   *ll_fop;
 495
 496        /* =0 - hold lock over whole read/write
 497         * >0 - max. chunk to be read/written w/o lock re-acquiring */
 498        unsigned long        ll_max_rw_chunk;
 499        unsigned int          ll_md_brw_size; /* used by readdir */
 500
 501        struct lu_site     *ll_site;
 502        struct cl_device         *ll_cl;
 503        /* Statistics */
 504        struct ll_rw_extents_info ll_rw_extents_info;
 505        int                    ll_extent_process_count;
 506        struct ll_rw_process_info ll_rw_process_info[LL_PROCESS_HIST_MAX];
 507        unsigned int          ll_offset_process_count;
 508        struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
 509        unsigned int          ll_rw_offset_entry_count;
 510        int                    ll_stats_track_id;
 511        enum stats_track_type     ll_stats_track_type;
 512        int                    ll_rw_stats_on;
 513
 514        /* metadata stat-ahead */
 515        unsigned int          ll_sa_max;     /* max statahead RPCs */
 516        atomic_t                  ll_sa_total;   /* statahead thread started
 517                                                  * count */
 518        atomic_t                  ll_sa_wrong;   /* statahead thread stopped for
 519                                                  * low hit ratio */
 520        atomic_t                  ll_agl_total;  /* AGL thread started count */
 521
 522        dev_t                ll_sdev_orig; /* save s_dev before assign for
 523                                                 * clustered nfs */
 524        struct rmtacl_ctl_table   ll_rct;
 525        struct eacl_table        ll_et;
 526        __kernel_fsid_t           ll_fsid;
 527};
 528
 529#define LL_DEFAULT_MAX_RW_CHUNK      (32 * 1024 * 1024)
 530
 531struct ll_ra_read {
 532        pgoff_t      lrr_start;
 533        pgoff_t      lrr_count;
 534        struct task_struct *lrr_reader;
 535        struct list_head          lrr_linkage;
 536};
 537
 538/*
 539 * per file-descriptor read-ahead data.
 540 */
 541struct ll_readahead_state {
 542        spinlock_t  ras_lock;
 543        /*
 544         * index of the last page that read(2) needed and that wasn't in the
 545         * cache. Used by ras_update() to detect seeks.
 546         *
 547         * XXX nikita: if access seeks into cached region, Lustre doesn't see
 548         * this.
 549         */
 550        unsigned long   ras_last_readpage;
 551        /*
 552         * number of pages read after last read-ahead window reset. As window
 553         * is reset on each seek, this is effectively a number of consecutive
 554         * accesses. Maybe ->ras_accessed_in_window is better name.
 555         *
 556         * XXX nikita: window is also reset (by ras_update()) when Lustre
 557         * believes that memory pressure evicts read-ahead pages. In that
 558         * case, it probably doesn't make sense to expand window to
 559         * PTLRPC_MAX_BRW_PAGES on the third access.
 560         */
 561        unsigned long   ras_consecutive_pages;
 562        /*
 563         * number of read requests after the last read-ahead window reset
 564         * As window is reset on each seek, this is effectively the number
 565         * on consecutive read request and is used to trigger read-ahead.
 566         */
 567        unsigned long   ras_consecutive_requests;
 568        /*
 569         * Parameters of current read-ahead window. Handled by
 570         * ras_update(). On the initial access to the file or after a seek,
 571         * window is reset to 0. After 3 consecutive accesses, window is
 572         * expanded to PTLRPC_MAX_BRW_PAGES. Afterwards, window is enlarged by
 573         * PTLRPC_MAX_BRW_PAGES chunks up to ->ra_max_pages.
 574         */
 575        unsigned long   ras_window_start, ras_window_len;
 576        /*
 577         * Where next read-ahead should start at. This lies within read-ahead
 578         * window. Read-ahead window is read in pieces rather than at once
 579         * because: 1. lustre limits total number of pages under read-ahead by
 580         * ->ra_max_pages (see ll_ra_count_get()), 2. client cannot read pages
 581         * not covered by DLM lock.
 582         */
 583        unsigned long   ras_next_readahead;
 584        /*
 585         * Total number of ll_file_read requests issued, reads originating
 586         * due to mmap are not counted in this total.  This value is used to
 587         * trigger full file read-ahead after multiple reads to a small file.
 588         */
 589        unsigned long   ras_requests;
 590        /*
 591         * Page index with respect to the current request, these value
 592         * will not be accurate when dealing with reads issued via mmap.
 593         */
 594        unsigned long   ras_request_index;
 595        /*
 596         * list of struct ll_ra_read's one per read(2) call current in
 597         * progress against this file descriptor. Used by read-ahead code,
 598         * protected by ->ras_lock.
 599         */
 600        struct list_head      ras_read_beads;
 601        /*
 602         * The following 3 items are used for detecting the stride I/O
 603         * mode.
 604         * In stride I/O mode,
 605         * ...............|-----data-----|****gap*****|--------|******|....
 606         *    offset      |-stride_pages-|-stride_gap-|
 607         * ras_stride_offset = offset;
 608         * ras_stride_length = stride_pages + stride_gap;
 609         * ras_stride_pages = stride_pages;
 610         * Note: all these three items are counted by pages.
 611         */
 612        unsigned long   ras_stride_length;
 613        unsigned long   ras_stride_pages;
 614        pgoff_t  ras_stride_offset;
 615        /*
 616         * number of consecutive stride request count, and it is similar as
 617         * ras_consecutive_requests, but used for stride I/O mode.
 618         * Note: only more than 2 consecutive stride request are detected,
 619         * stride read-ahead will be enable
 620         */
 621        unsigned long   ras_consecutive_stride_requests;
 622};
 623
 624extern struct kmem_cache *ll_file_data_slab;
 625struct lustre_handle;
 626struct ll_file_data {
 627        struct ll_readahead_state fd_ras;
 628        struct ccc_grouplock fd_grouplock;
 629        __u64 lfd_pos;
 630        __u32 fd_flags;
 631        fmode_t fd_omode;
 632        /* openhandle if lease exists for this file.
 633         * Borrow lli->lli_och_mutex to protect assignment */
 634        struct obd_client_handle *fd_lease_och;
 635        struct obd_client_handle *fd_och;
 636        struct file *fd_file;
 637        /* Indicate whether need to report failure when close.
 638         * true: failure is known, not report again.
 639         * false: unknown failure, should report. */
 640        bool fd_write_failed;
 641};
 642
 643struct lov_stripe_md;
 644
 645extern spinlock_t inode_lock;
 646
 647extern struct proc_dir_entry *proc_lustre_fs_root;
 648
 649static inline struct inode *ll_info2i(struct ll_inode_info *lli)
 650{
 651        return &lli->lli_vfs_inode;
 652}
 653
 654__u32 ll_i2suppgid(struct inode *i);
 655void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2);
 656
 657static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
 658{
 659#if BITS_PER_LONG == 32
 660        return 1;
 661#elif defined(CONFIG_COMPAT)
 662        return unlikely(is_compat_task() || (sbi->ll_flags & LL_SBI_32BIT_API));
 663#else
 664        return unlikely(sbi->ll_flags & LL_SBI_32BIT_API);
 665#endif
 666}
 667
 668void ll_ra_read_in(struct file *f, struct ll_ra_read *rar);
 669void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar);
 670struct ll_ra_read *ll_ra_read_get(struct file *f);
 671
 672/* llite/lproc_llite.c */
 673#if defined (CONFIG_PROC_FS)
 674int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
 675                                struct super_block *sb, char *osc, char *mdc);
 676void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
 677void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count);
 678void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars);
 679void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
 680                       struct ll_file_data *file, loff_t pos,
 681                       size_t count, int rw);
 682#else
 683static inline int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
 684                        struct super_block *sb, char *osc, char *mdc){return 0;}
 685static inline void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) {}
 686static inline
 687void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count) {}
 688static inline void lprocfs_llite_init_vars(struct lprocfs_static_vars *lvars)
 689{
 690        memset(lvars, 0, sizeof(*lvars));
 691}
 692static inline void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
 693                                     struct ll_file_data *file, loff_t pos,
 694                                     size_t count, int rw) {}
 695#endif
 696
 697
 698/* llite/dir.c */
 699void ll_release_page(struct page *page, int remove);
 700extern const struct file_operations ll_dir_operations;
 701extern const struct inode_operations ll_dir_inode_operations;
 702struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
 703                             struct ll_dir_chain *chain);
 704int ll_dir_read(struct inode *inode, struct dir_context *ctx);
 705
 706int ll_get_mdt_idx(struct inode *inode);
 707/* llite/namei.c */
 708extern const struct inode_operations ll_special_inode_operations;
 709
 710int ll_objects_destroy(struct ptlrpc_request *request,
 711                       struct inode *dir);
 712struct inode *ll_iget(struct super_block *sb, ino_t hash,
 713                      struct lustre_md *lic);
 714int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
 715                       void *data, int flag);
 716struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
 717int ll_rmdir_entry(struct inode *dir, char *name, int namelen);
 718
 719/* llite/rw.c */
 720int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
 721int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
 722int ll_writepage(struct page *page, struct writeback_control *wbc);
 723int ll_writepages(struct address_space *, struct writeback_control *wbc);
 724int ll_readpage(struct file *file, struct page *page);
 725void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
 726int ll_readahead(const struct lu_env *env, struct cl_io *io,
 727                 struct ll_readahead_state *ras, struct address_space *mapping,
 728                 struct cl_page_list *queue, int flags);
 729
 730#ifndef MS_HAS_NEW_AOPS
 731extern const struct address_space_operations ll_aops;
 732#else
 733extern const struct address_space_operations_ext ll_aops;
 734#endif
 735
 736/* llite/file.c */
 737extern struct file_operations ll_file_operations;
 738extern struct file_operations ll_file_operations_flock;
 739extern struct file_operations ll_file_operations_noflock;
 740extern struct inode_operations ll_file_inode_operations;
 741extern int ll_have_md_lock(struct inode *inode, __u64 *bits,
 742                           ldlm_mode_t l_req_mode);
 743extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
 744                                   struct lustre_handle *lockh, __u64 flags,
 745                                   ldlm_mode_t mode);
 746int ll_file_open(struct inode *inode, struct file *file);
 747int ll_file_release(struct inode *inode, struct file *file);
 748int ll_glimpse_ioctl(struct ll_sb_info *sbi,
 749                     struct lov_stripe_md *lsm, lstat_t *st);
 750void ll_ioepoch_open(struct ll_inode_info *lli, __u64 ioepoch);
 751int ll_release_openhandle(struct inode *, struct lookup_intent *);
 752int ll_md_real_close(struct inode *inode, fmode_t fmode);
 753void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data,
 754                      struct obd_client_handle **och, unsigned long flags);
 755void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data);
 756int ll_som_update(struct inode *inode, struct md_op_data *op_data);
 757int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
 758                     __u64 ioepoch, int sync);
 759void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
 760                          struct lustre_handle *fh);
 761int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
 762struct posix_acl *ll_get_acl(struct inode *inode, int type);
 763
 764int ll_inode_permission(struct inode *inode, int mask);
 765
 766int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
 767                             int flags, struct lov_user_md *lum,
 768                             int lum_size);
 769int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
 770                             struct lov_mds_md **lmm, int *lmm_size,
 771                             struct ptlrpc_request **request);
 772int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
 773                     int set_default);
 774int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
 775                     int *lmm_size, struct ptlrpc_request **request);
 776int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
 777int ll_merge_lvb(const struct lu_env *env, struct inode *inode);
 778int ll_fid2path(struct inode *inode, void __user *arg);
 779int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock);
 780int ll_hsm_release(struct inode *inode);
 781
 782/* llite/dcache.c */
 783
 784int ll_d_init(struct dentry *de);
 785extern const struct dentry_operations ll_d_ops;
 786void ll_intent_drop_lock(struct lookup_intent *);
 787void ll_intent_release(struct lookup_intent *);
 788void ll_invalidate_aliases(struct inode *);
 789void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode);
 790int ll_revalidate_it_finish(struct ptlrpc_request *request,
 791                            struct lookup_intent *it, struct inode *inode);
 792
 793/* llite/llite_lib.c */
 794extern struct super_operations lustre_super_operations;
 795
 796void ll_lli_init(struct ll_inode_info *lli);
 797int ll_fill_super(struct super_block *sb, struct vfsmount *mnt);
 798void ll_put_super(struct super_block *sb);
 799void ll_kill_super(struct super_block *sb);
 800struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock);
 801void ll_clear_inode(struct inode *inode);
 802int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import);
 803int ll_setattr(struct dentry *de, struct iattr *attr);
 804int ll_statfs(struct dentry *de, struct kstatfs *sfs);
 805int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
 806                       __u64 max_age, __u32 flags);
 807void ll_update_inode(struct inode *inode, struct lustre_md *md);
 808void ll_read_inode2(struct inode *inode, void *opaque);
 809void ll_delete_inode(struct inode *inode);
 810int ll_iocontrol(struct inode *inode, struct file *file,
 811                 unsigned int cmd, unsigned long arg);
 812int ll_flush_ctx(struct inode *inode);
 813void ll_umount_begin(struct super_block *sb);
 814int ll_remount_fs(struct super_block *sb, int *flags, char *data);
 815int ll_show_options(struct seq_file *seq, struct dentry *dentry);
 816void ll_dirty_page_discard_warn(struct page *page, int ioret);
 817int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
 818                  struct super_block *, struct lookup_intent *);
 819int ll_obd_statfs(struct inode *inode, void *arg);
 820int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
 821int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize);
 822int ll_get_max_cookiesize(struct ll_sb_info *sbi, int *max_cookiesize);
 823int ll_get_default_cookiesize(struct ll_sb_info *sbi, int *default_cookiesize);
 824int ll_process_config(struct lustre_cfg *lcfg);
 825struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
 826                                      struct inode *i1, struct inode *i2,
 827                                      const char *name, int namelen,
 828                                      int mode, __u32 opc, void *data);
 829void ll_finish_md_op_data(struct md_op_data *op_data);
 830int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
 831char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
 832
 833/* llite/llite_nfs.c */
 834extern struct export_operations lustre_export_operations;
 835__u32 get_uuid2int(const char *name, int len);
 836void get_uuid2fsid(const char *name, int len, __kernel_fsid_t *fsid);
 837struct inode *search_inode_for_lustre(struct super_block *sb,
 838                                      const struct lu_fid *fid);
 839
 840/* llite/symlink.c */
 841extern struct inode_operations ll_fast_symlink_inode_operations;
 842
 843/* llite/llite_close.c */
 844struct ll_close_queue {
 845        spinlock_t              lcq_lock;
 846        struct list_head                lcq_head;
 847        wait_queue_head_t               lcq_waitq;
 848        struct completion       lcq_comp;
 849        atomic_t                lcq_stop;
 850};
 851
 852struct ccc_object *cl_inode2ccc(struct inode *inode);
 853
 854
 855void vvp_write_pending (struct ccc_object *club, struct ccc_page *page);
 856void vvp_write_complete(struct ccc_object *club, struct ccc_page *page);
 857
 858/* specific architecture can implement only part of this list */
 859enum vvp_io_subtype {
 860        /** normal IO */
 861        IO_NORMAL,
 862        /** io started from splice_{read|write} */
 863        IO_SPLICE
 864};
 865
 866/* IO subtypes */
 867struct vvp_io {
 868        /** io subtype */
 869        enum vvp_io_subtype    cui_io_subtype;
 870
 871        union {
 872                struct {
 873                        struct pipe_inode_info *cui_pipe;
 874                        unsigned int        cui_flags;
 875                } splice;
 876                struct vvp_fault_io {
 877                        /**
 878                         * Inode modification time that is checked across DLM
 879                         * lock request.
 880                         */
 881                        time_t           ft_mtime;
 882                        struct vm_area_struct *ft_vma;
 883                        /**
 884                         *  locked page returned from vvp_io
 885                         */
 886                        struct page         *ft_vmpage;
 887                        struct vm_fault_api {
 888                                /**
 889                                 * kernel fault info
 890                                 */
 891                                struct vm_fault *ft_vmf;
 892                                /**
 893                                 * fault API used bitflags for return code.
 894                                 */
 895                                unsigned int    ft_flags;
 896                                /**
 897                                 * check that flags are from filemap_fault
 898                                 */
 899                                bool            ft_flags_valid;
 900                        } fault;
 901                } fault;
 902        } u;
 903        /**
 904         * Read-ahead state used by read and page-fault IO contexts.
 905         */
 906        struct ll_ra_read    cui_bead;
 907        /**
 908         * Set when cui_bead has been initialized.
 909         */
 910        int               cui_ra_window_set;
 911};
 912
 913/**
 914 * IO arguments for various VFS I/O interfaces.
 915 */
 916struct vvp_io_args {
 917        /** normal/splice */
 918        enum vvp_io_subtype via_io_subtype;
 919
 920        union {
 921                struct {
 922                        struct kiocb      *via_iocb;
 923                        struct iov_iter   *via_iter;
 924                } normal;
 925                struct {
 926                        struct pipe_inode_info  *via_pipe;
 927                        unsigned int       via_flags;
 928                } splice;
 929        } u;
 930};
 931
 932struct ll_cl_context {
 933        void       *lcc_cookie;
 934        struct cl_io   *lcc_io;
 935        struct cl_page *lcc_page;
 936        struct lu_env  *lcc_env;
 937        int          lcc_refcheck;
 938};
 939
 940struct vvp_thread_info {
 941        struct vvp_io_args   vti_args;
 942        struct ra_io_arg     vti_ria;
 943        struct ll_cl_context vti_io_ctx;
 944};
 945
 946static inline struct vvp_thread_info *vvp_env_info(const struct lu_env *env)
 947{
 948        extern struct lu_context_key vvp_key;
 949        struct vvp_thread_info      *info;
 950
 951        info = lu_context_key_get(&env->le_ctx, &vvp_key);
 952        LASSERT(info != NULL);
 953        return info;
 954}
 955
 956static inline struct vvp_io_args *vvp_env_args(const struct lu_env *env,
 957                                               enum vvp_io_subtype type)
 958{
 959        struct vvp_io_args *ret = &vvp_env_info(env)->vti_args;
 960
 961        ret->via_io_subtype = type;
 962
 963        return ret;
 964}
 965
 966struct vvp_session {
 967        struct vvp_io    vs_ios;
 968};
 969
 970static inline struct vvp_session *vvp_env_session(const struct lu_env *env)
 971{
 972        extern struct lu_context_key vvp_session_key;
 973        struct vvp_session *ses;
 974
 975        ses = lu_context_key_get(env->le_ses, &vvp_session_key);
 976        LASSERT(ses != NULL);
 977        return ses;
 978}
 979
 980static inline struct vvp_io *vvp_env_io(const struct lu_env *env)
 981{
 982        return &vvp_env_session(env)->vs_ios;
 983}
 984
 985int vvp_global_init(void);
 986void vvp_global_fini(void);
 987
 988void ll_queue_done_writing(struct inode *inode, unsigned long flags);
 989void ll_close_thread_shutdown(struct ll_close_queue *lcq);
 990int ll_close_thread_start(struct ll_close_queue **lcq_ret);
 991
 992/* llite/llite_mmap.c */
 993
 994int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last);
 995int ll_file_mmap(struct file *file, struct vm_area_struct *vma);
 996void policy_from_vma(ldlm_policy_data_t *policy,
 997                struct vm_area_struct *vma, unsigned long addr, size_t count);
 998struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
 999                               size_t count);
1000
1001static inline void ll_invalidate_page(struct page *vmpage)
1002{
1003        struct address_space *mapping = vmpage->mapping;
1004        loff_t offset = vmpage->index << PAGE_CACHE_SHIFT;
1005
1006        LASSERT(PageLocked(vmpage));
1007        if (mapping == NULL)
1008                return;
1009
1010        ll_teardown_mmaps(mapping, offset, offset + PAGE_CACHE_SIZE);
1011        truncate_complete_page(mapping, vmpage);
1012}
1013
1014#define    ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
1015
1016/* don't need an addref as the sb_info should be holding one */
1017static inline struct obd_export *ll_s2dtexp(struct super_block *sb)
1018{
1019        return ll_s2sbi(sb)->ll_dt_exp;
1020}
1021
1022/* don't need an addref as the sb_info should be holding one */
1023static inline struct obd_export *ll_s2mdexp(struct super_block *sb)
1024{
1025        return ll_s2sbi(sb)->ll_md_exp;
1026}
1027
1028static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi)
1029{
1030        struct obd_device *obd = sbi->ll_md_exp->exp_obd;
1031        if (obd == NULL)
1032                LBUG();
1033        return &obd->u.cli;
1034}
1035
1036/* FIXME: replace the name of this with LL_SB to conform to kernel stuff */
1037static inline struct ll_sb_info *ll_i2sbi(struct inode *inode)
1038{
1039        return ll_s2sbi(inode->i_sb);
1040}
1041
1042static inline struct obd_export *ll_i2dtexp(struct inode *inode)
1043{
1044        return ll_s2dtexp(inode->i_sb);
1045}
1046
1047static inline struct obd_export *ll_i2mdexp(struct inode *inode)
1048{
1049        return ll_s2mdexp(inode->i_sb);
1050}
1051
1052static inline struct lu_fid *ll_inode2fid(struct inode *inode)
1053{
1054        struct lu_fid *fid;
1055
1056        LASSERT(inode != NULL);
1057        fid = &ll_i2info(inode)->lli_fid;
1058
1059        return fid;
1060}
1061
1062static inline __u64 ll_file_maxbytes(struct inode *inode)
1063{
1064        return ll_i2info(inode)->lli_maxbytes;
1065}
1066
1067/* llite/xattr.c */
1068int ll_setxattr(struct dentry *dentry, const char *name,
1069                const void *value, size_t size, int flags);
1070ssize_t ll_getxattr(struct dentry *dentry, const char *name,
1071                    void *buffer, size_t size);
1072ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size);
1073int ll_removexattr(struct dentry *dentry, const char *name);
1074
1075/* llite/remote_perm.c */
1076extern struct kmem_cache *ll_remote_perm_cachep;
1077extern struct kmem_cache *ll_rmtperm_hash_cachep;
1078
1079void free_rmtperm_hash(struct hlist_head *hash);
1080int ll_update_remote_perm(struct inode *inode, struct mdt_remote_perm *perm);
1081int lustre_check_remote_perm(struct inode *inode, int mask);
1082
1083/* llite/llite_capa.c */
1084extern struct timer_list ll_capa_timer;
1085
1086int ll_capa_thread_start(void);
1087void ll_capa_thread_stop(void);
1088void ll_capa_timer_callback(unsigned long unused);
1089
1090struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa);
1091
1092void ll_capa_open(struct inode *inode);
1093void ll_capa_close(struct inode *inode);
1094
1095struct obd_capa *ll_mdscapa_get(struct inode *inode);
1096struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc);
1097
1098void ll_truncate_free_capa(struct obd_capa *ocapa);
1099void ll_clear_inode_capas(struct inode *inode);
1100void ll_print_capa_stat(struct ll_sb_info *sbi);
1101
1102/* llite/llite_cl.c */
1103extern struct lu_device_type vvp_device_type;
1104
1105/**
1106 * Common IO arguments for various VFS I/O interfaces.
1107 */
1108int cl_sb_init(struct super_block *sb);
1109int cl_sb_fini(struct super_block *sb);
1110void ll_io_init(struct cl_io *io, const struct file *file, int write);
1111
1112void ras_update(struct ll_sb_info *sbi, struct inode *inode,
1113                struct ll_readahead_state *ras, unsigned long index,
1114                unsigned hit);
1115void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len);
1116void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which);
1117
1118/* llite/llite_rmtacl.c */
1119#ifdef CONFIG_FS_POSIX_ACL
1120struct eacl_entry {
1121        struct list_head            ee_list;
1122        pid_t            ee_key; /* hash key */
1123        struct lu_fid    ee_fid;
1124        int                ee_type; /* ACL type for ACCESS or DEFAULT */
1125        ext_acl_xattr_header *ee_acl;
1126};
1127
1128u64 rce_ops2valid(int ops);
1129struct rmtacl_ctl_entry *rct_search(struct rmtacl_ctl_table *rct, pid_t key);
1130int rct_add(struct rmtacl_ctl_table *rct, pid_t key, int ops);
1131int rct_del(struct rmtacl_ctl_table *rct, pid_t key);
1132void rct_init(struct rmtacl_ctl_table *rct);
1133void rct_fini(struct rmtacl_ctl_table *rct);
1134
1135void ee_free(struct eacl_entry *ee);
1136int ee_add(struct eacl_table *et, pid_t key, struct lu_fid *fid, int type,
1137           ext_acl_xattr_header *header);
1138struct eacl_entry *et_search_del(struct eacl_table *et, pid_t key,
1139                                 struct lu_fid *fid, int type);
1140void et_search_free(struct eacl_table *et, pid_t key);
1141void et_init(struct eacl_table *et);
1142void et_fini(struct eacl_table *et);
1143#else
1144static inline u64 rce_ops2valid(int ops)
1145{
1146        return 0;
1147}
1148#endif
1149
1150/* statahead.c */
1151
1152#define LL_SA_RPC_MIN      2
1153#define LL_SA_RPC_DEF      32
1154#define LL_SA_RPC_MAX      8192
1155
1156#define LL_SA_CACHE_BIT  5
1157#define LL_SA_CACHE_SIZE        (1 << LL_SA_CACHE_BIT)
1158#define LL_SA_CACHE_MASK        (LL_SA_CACHE_SIZE - 1)
1159
1160/* per inode struct, for dir only */
1161struct ll_statahead_info {
1162        struct inode       *sai_inode;
1163        atomic_t            sai_refcount;   /* when access this struct, hold
1164                                                 * refcount */
1165        unsigned int        sai_generation; /* generation for statahead */
1166        unsigned int        sai_max;    /* max ahead of lookup */
1167        __u64              sai_sent;       /* stat requests sent count */
1168        __u64              sai_replied;    /* stat requests which received
1169                                                 * reply */
1170        __u64              sai_index;      /* index of statahead entry */
1171        __u64              sai_index_wait; /* index of entry which is the
1172                                                 * caller is waiting for */
1173        __u64              sai_hit;     /* hit count */
1174        __u64              sai_miss;       /* miss count:
1175                                                 * for "ls -al" case, it includes
1176                                                 * hidden dentry miss;
1177                                                 * for "ls -l" case, it does not
1178                                                 * include hidden dentry miss.
1179                                                 * "sai_miss_hidden" is used for
1180                                                 * the later case.
1181                                                 */
1182        unsigned int        sai_consecutive_miss; /* consecutive miss */
1183        unsigned int        sai_miss_hidden;/* "ls -al", but first dentry
1184                                                 * is not a hidden one */
1185        unsigned int        sai_skip_hidden;/* skipped hidden dentry count */
1186        unsigned int        sai_ls_all:1,   /* "ls -al", do stat-ahead for
1187                                                 * hidden entries */
1188                                sai_agl_valid:1;/* AGL is valid for the dir */
1189        wait_queue_head_t            sai_waitq;      /* stat-ahead wait queue */
1190        struct ptlrpc_thread    sai_thread;     /* stat-ahead thread */
1191        struct ptlrpc_thread    sai_agl_thread; /* AGL thread */
1192        struct list_head              sai_entries;    /* entry list */
1193        struct list_head              sai_entries_received; /* entries returned */
1194        struct list_head              sai_entries_stated;   /* entries stated */
1195        struct list_head              sai_entries_agl; /* AGL entries to be sent */
1196        struct list_head              sai_cache[LL_SA_CACHE_SIZE];
1197        spinlock_t              sai_cache_lock[LL_SA_CACHE_SIZE];
1198        atomic_t                sai_cache_count; /* entry count in cache */
1199};
1200
1201int do_statahead_enter(struct inode *dir, struct dentry **dentry,
1202                       int only_unplug);
1203void ll_stop_statahead(struct inode *dir, void *key);
1204
1205static inline int ll_glimpse_size(struct inode *inode)
1206{
1207        struct ll_inode_info *lli = ll_i2info(inode);
1208        int rc;
1209
1210        down_read(&lli->lli_glimpse_sem);
1211        rc = cl_glimpse_size(inode);
1212        lli->lli_glimpse_time = cfs_time_current();
1213        up_read(&lli->lli_glimpse_sem);
1214        return rc;
1215}
1216
1217static inline void
1218ll_statahead_mark(struct inode *dir, struct dentry *dentry)
1219{
1220        struct ll_inode_info     *lli = ll_i2info(dir);
1221        struct ll_statahead_info *sai = lli->lli_sai;
1222        struct ll_dentry_data    *ldd = ll_d2d(dentry);
1223
1224        /* not the same process, don't mark */
1225        if (lli->lli_opendir_pid != current_pid())
1226                return;
1227
1228        LASSERT(ldd != NULL);
1229        if (sai != NULL)
1230                ldd->lld_sa_generation = sai->sai_generation;
1231}
1232
1233static inline int
1234d_need_statahead(struct inode *dir, struct dentry *dentryp)
1235{
1236        struct ll_inode_info  *lli;
1237        struct ll_dentry_data *ldd;
1238
1239        if (ll_i2sbi(dir)->ll_sa_max == 0)
1240                return -EAGAIN;
1241
1242        lli = ll_i2info(dir);
1243        /* not the same process, don't statahead */
1244        if (lli->lli_opendir_pid != current_pid())
1245                return -EAGAIN;
1246
1247        /* statahead has been stopped */
1248        if (lli->lli_opendir_key == NULL)
1249                return -EAGAIN;
1250
1251        ldd = ll_d2d(dentryp);
1252        /*
1253         * When stats a dentry, the system trigger more than once "revalidate"
1254         * or "lookup", for "getattr", for "getxattr", and maybe for others.
1255         * Under patchless client mode, the operation intent is not accurate,
1256         * which maybe misguide the statahead thread. For example:
1257         * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
1258         * have the same operation intent -- "IT_GETATTR".
1259         * In fact, one dentry should has only one chance to interact with the
1260         * statahead thread, otherwise the statahead windows will be confused.
1261         * The solution is as following:
1262         * Assign "lld_sa_generation" with "sai_generation" when a dentry
1263         * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
1264         * will bypass interacting with statahead thread for checking:
1265         * "lld_sa_generation == lli_sai->sai_generation"
1266         */
1267        if (ldd && lli->lli_sai &&
1268            ldd->lld_sa_generation == lli->lli_sai->sai_generation)
1269                return -EAGAIN;
1270
1271        return 1;
1272}
1273
1274static inline int
1275ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
1276{
1277        int ret;
1278
1279        ret = d_need_statahead(dir, *dentryp);
1280        if (ret <= 0)
1281                return ret;
1282
1283        return do_statahead_enter(dir, dentryp, only_unplug);
1284}
1285
1286/* llite ioctl register support routine */
1287enum llioc_iter {
1288        LLIOC_CONT = 0,
1289        LLIOC_STOP
1290};
1291
1292#define LLIOC_MAX_CMD      256
1293
1294/*
1295 * Rules to write a callback function:
1296 *
1297 * Parameters:
1298 *  @magic: Dynamic ioctl call routine will feed this value with the pointer
1299 *      returned to ll_iocontrol_register.  Callback functions should use this
1300 *      data to check the potential collasion of ioctl cmd. If collasion is
1301 *      found, callback function should return LLIOC_CONT.
1302 *  @rcp: The result of ioctl command.
1303 *
1304 *  Return values:
1305 *      If @magic matches the pointer returned by ll_iocontrol_data, the
1306 *      callback should return LLIOC_STOP; return LLIOC_STOP otherwise.
1307 */
1308typedef enum llioc_iter (*llioc_callback_t)(struct inode *inode,
1309                struct file *file, unsigned int cmd, unsigned long arg,
1310                void *magic, int *rcp);
1311
1312/* export functions */
1313/* Register ioctl block dynamatically for a regular file.
1314 *
1315 * @cmd: the array of ioctl command set
1316 * @count: number of commands in the @cmd
1317 * @cb: callback function, it will be called if an ioctl command is found to
1318 *      belong to the command list @cmd.
1319 *
1320 * Return value:
1321 *      A magic pointer will be returned if success;
1322 *      otherwise, NULL will be returned.
1323 * */
1324void *ll_iocontrol_register(llioc_callback_t cb, int count, unsigned int *cmd);
1325void ll_iocontrol_unregister(void *magic);
1326
1327
1328/* lclient compat stuff */
1329#define cl_inode_info ll_inode_info
1330#define cl_i2info(info) ll_i2info(info)
1331#define cl_inode_mode(inode) ((inode)->i_mode)
1332#define cl_i2sbi ll_i2sbi
1333
1334static inline struct ll_file_data *cl_iattr2fd(struct inode *inode,
1335                                               const struct iattr *attr)
1336{
1337        LASSERT(attr->ia_valid & ATTR_FILE);
1338        return LUSTRE_FPRIVATE(attr->ia_file);
1339}
1340
1341static inline void cl_isize_lock(struct inode *inode)
1342{
1343        ll_inode_size_lock(inode);
1344}
1345
1346static inline void cl_isize_unlock(struct inode *inode)
1347{
1348        ll_inode_size_unlock(inode);
1349}
1350
1351static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms)
1352{
1353        LASSERT(mutex_is_locked(&ll_i2info(inode)->lli_size_mutex));
1354        i_size_write(inode, kms);
1355}
1356
1357static inline void cl_isize_write(struct inode *inode, loff_t kms)
1358{
1359        ll_inode_size_lock(inode);
1360        i_size_write(inode, kms);
1361        ll_inode_size_unlock(inode);
1362}
1363
1364#define cl_isize_read(inode)         i_size_read(inode)
1365
1366static inline int cl_merge_lvb(const struct lu_env *env, struct inode *inode)
1367{
1368        return ll_merge_lvb(env, inode);
1369}
1370
1371#define cl_inode_atime(inode) LTIME_S((inode)->i_atime)
1372#define cl_inode_ctime(inode) LTIME_S((inode)->i_ctime)
1373#define cl_inode_mtime(inode) LTIME_S((inode)->i_mtime)
1374
1375struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt);
1376
1377int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
1378                       enum cl_fsync_mode mode, int ignore_layout);
1379
1380/** direct write pages */
1381struct ll_dio_pages {
1382        /** page array to be written. we don't support
1383         * partial pages except the last one. */
1384        struct page **ldp_pages;
1385        /* offset of each page */
1386        loff_t       *ldp_offsets;
1387        /** if ldp_offsets is NULL, it means a sequential
1388         * pages to be written, then this is the file offset
1389         * of the * first page. */
1390        loff_t  ldp_start_offset;
1391        /** how many bytes are to be written. */
1392        size_t  ldp_size;
1393        /** # of pages in the array. */
1394        int        ldp_nr;
1395};
1396
1397static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
1398                                  int rc)
1399{
1400        int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
1401                                      LPROC_LL_OSC_WRITE;
1402
1403        ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
1404}
1405
1406extern ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
1407                                  int rw, struct inode *inode,
1408                                  struct ll_dio_pages *pv);
1409
1410static inline int ll_file_nolock(const struct file *file)
1411{
1412        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1413        struct inode *inode = file_inode(file);
1414
1415        LASSERT(fd != NULL);
1416        return ((fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
1417                (ll_i2sbi(inode)->ll_flags & LL_SBI_NOLCK));
1418}
1419
1420static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
1421                                    struct lookup_intent *it, __u64 *bits)
1422{
1423        if (!it->d.lustre.it_lock_set) {
1424                struct lustre_handle handle;
1425
1426                /* If this inode is a remote object, it will get two
1427                 * separate locks in different namespaces, Master MDT,
1428                 * where the name entry is, will grant LOOKUP lock,
1429                 * remote MDT, where the object is, will grant
1430                 * UPDATE|PERM lock. The inode will be attached to both
1431                 * LOOKUP and PERM locks, so revoking either locks will
1432                 * case the dcache being cleared */
1433                if (it->d.lustre.it_remote_lock_mode) {
1434                        handle.cookie = it->d.lustre.it_remote_lock_handle;
1435                        CDEBUG(D_DLMTRACE, "setting l_data to inode %p(%lu/%u) for remote lock %#llx\n",
1436                               inode,
1437                               inode->i_ino, inode->i_generation,
1438                               handle.cookie);
1439                        md_set_lock_data(exp, &handle.cookie, inode, NULL);
1440                }
1441
1442                handle.cookie = it->d.lustre.it_lock_handle;
1443
1444                CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u) for lock %#llx\n",
1445                       inode, inode->i_ino,
1446                       inode->i_generation, handle.cookie);
1447
1448                md_set_lock_data(exp, &handle.cookie, inode,
1449                                 &it->d.lustre.it_lock_bits);
1450                it->d.lustre.it_lock_set = 1;
1451        }
1452
1453        if (bits != NULL)
1454                *bits = it->d.lustre.it_lock_bits;
1455}
1456
1457static inline void ll_lock_dcache(struct inode *inode)
1458{
1459        spin_lock(&inode->i_lock);
1460}
1461
1462static inline void ll_unlock_dcache(struct inode *inode)
1463{
1464        spin_unlock(&inode->i_lock);
1465}
1466
1467static inline int d_lustre_invalid(const struct dentry *dentry)
1468{
1469        struct ll_dentry_data *lld = ll_d2d(dentry);
1470
1471        return (lld == NULL) || lld->lld_invalid;
1472}
1473
1474static inline void __d_lustre_invalidate(struct dentry *dentry)
1475{
1476        struct ll_dentry_data *lld = ll_d2d(dentry);
1477
1478        if (lld != NULL)
1479                lld->lld_invalid = 1;
1480}
1481
1482/*
1483 * Mark dentry INVALID, if dentry refcount is zero (this is normally case for
1484 * ll_md_blocking_ast), unhash this dentry, and let dcache to reclaim it later;
1485 * else dput() of the last refcount will unhash this dentry and kill it.
1486 */
1487static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
1488{
1489        CDEBUG(D_DENTRY, "invalidate dentry %pd (%p) parent %p inode %p refc %d\n",
1490               dentry, dentry,
1491               dentry->d_parent, d_inode(dentry), d_count(dentry));
1492
1493        spin_lock_nested(&dentry->d_lock,
1494                         nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL);
1495        __d_lustre_invalidate(dentry);
1496        if (d_count(dentry) == 0)
1497                __d_drop(dentry);
1498        spin_unlock(&dentry->d_lock);
1499}
1500
1501static inline void d_lustre_revalidate(struct dentry *dentry)
1502{
1503        spin_lock(&dentry->d_lock);
1504        LASSERT(ll_d2d(dentry) != NULL);
1505        ll_d2d(dentry)->lld_invalid = 0;
1506        spin_unlock(&dentry->d_lock);
1507}
1508
1509enum {
1510        LL_LAYOUT_GEN_NONE  = ((__u32)-2),      /* layout lock was cancelled */
1511        LL_LAYOUT_GEN_EMPTY = ((__u32)-1)       /* for empty layout */
1512};
1513
1514int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
1515int ll_layout_refresh(struct inode *inode, __u32 *gen);
1516int ll_layout_restore(struct inode *inode);
1517
1518int ll_xattr_init(void);
1519void ll_xattr_fini(void);
1520
1521#endif /* LLITE_INTERNAL_H */
1522