linux/fs/orangefs/orangefs-kernel.h
<<
>>
Prefs
   1/*
   2 * (C) 2001 Clemson University and The University of Chicago
   3 *
   4 * See COPYING in top-level directory.
   5 */
   6
   7/*
   8 *  The ORANGEFS Linux kernel support allows ORANGEFS volumes to be mounted and
   9 *  accessed through the Linux VFS (i.e. using standard I/O system calls).
  10 *  This support is only needed on clients that wish to mount the file system.
  11 *
  12 */
  13
  14/*
  15 *  Declarations and macros for the ORANGEFS Linux kernel support.
  16 */
  17
  18#ifndef __ORANGEFSKERNEL_H
  19#define __ORANGEFSKERNEL_H
  20
  21#include <linux/kernel.h>
  22#include <linux/moduleparam.h>
  23#include <linux/statfs.h>
  24#include <linux/backing-dev.h>
  25#include <linux/device.h>
  26#include <linux/mpage.h>
  27#include <linux/namei.h>
  28#include <linux/errno.h>
  29#include <linux/init.h>
  30#include <linux/module.h>
  31#include <linux/slab.h>
  32#include <linux/types.h>
  33#include <linux/fs.h>
  34#include <linux/vmalloc.h>
  35
  36#include <linux/aio.h>
  37#include <linux/posix_acl.h>
  38#include <linux/posix_acl_xattr.h>
  39#include <linux/compat.h>
  40#include <linux/mount.h>
  41#include <linux/uaccess.h>
  42#include <linux/atomic.h>
  43#include <linux/uio.h>
  44#include <linux/sched.h>
  45#include <linux/mm.h>
  46#include <linux/wait.h>
  47#include <linux/dcache.h>
  48#include <linux/pagemap.h>
  49#include <linux/poll.h>
  50#include <linux/rwsem.h>
  51#include <linux/xattr.h>
  52#include <linux/exportfs.h>
  53
  54#include <asm/unaligned.h>
  55
  56#include "orangefs-dev-proto.h"
  57
  58#ifdef ORANGEFS_KERNEL_DEBUG
  59#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       10
  60#else
  61#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       20
  62#endif
  63
  64#define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS   30
  65
  66#define ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS     900      /* 15 minutes */
  67
  68#define ORANGEFS_REQDEVICE_NAME          "pvfs2-req"
  69
  70#define ORANGEFS_DEVREQ_MAGIC             0x20030529
  71#define ORANGEFS_LINK_MAX                 0x000000FF
  72#define ORANGEFS_PURGE_RETRY_COUNT     0x00000005
  73#define ORANGEFS_MAX_NUM_OPTIONS          0x00000004
  74#define ORANGEFS_MAX_MOUNT_OPT_LEN        0x00000080
  75#define ORANGEFS_MAX_FSKEY_LEN            64
  76
  77#define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) +   \
  78sizeof(__u64) + sizeof(struct orangefs_upcall_s))
  79#define MAX_DEV_REQ_DOWNSIZE (2 * sizeof(__s32) + \
  80sizeof(__u64) + sizeof(struct orangefs_downcall_s))
  81
  82/*
  83 * valid orangefs kernel operation states
  84 *
  85 * unknown  - op was just initialized
  86 * waiting  - op is on request_list (upward bound)
  87 * inprogr  - op is in progress (waiting for downcall)
  88 * serviced - op has matching downcall; ok
  89 * purged   - op has to start a timer since client-core
  90 *            exited uncleanly before servicing op
  91 * given up - submitter has given up waiting for it
  92 */
  93enum orangefs_vfs_op_states {
  94        OP_VFS_STATE_UNKNOWN = 0,
  95        OP_VFS_STATE_WAITING = 1,
  96        OP_VFS_STATE_INPROGR = 2,
  97        OP_VFS_STATE_SERVICED = 4,
  98        OP_VFS_STATE_PURGED = 8,
  99        OP_VFS_STATE_GIVEN_UP = 16,
 100};
 101
 102/*
 103 * An array of client_debug_mask will be built to hold debug keyword/mask
 104 * values fetched from userspace.
 105 */
 106struct client_debug_mask {
 107        char *keyword;
 108        __u64 mask1;
 109        __u64 mask2;
 110};
 111
 112/*
 113 * orangefs kernel memory related flags
 114 */
 115
 116#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB))
 117#define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
 118#else
 119#define ORANGEFS_CACHE_CREATE_FLAGS 0
 120#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */
 121
 122/* these functions are defined in orangefs-utils.c */
 123int orangefs_prepare_cdm_array(char *debug_array_string);
 124int orangefs_prepare_debugfs_help_string(int);
 125
 126/* defined in orangefs-debugfs.c */
 127int orangefs_client_debug_init(void);
 128
 129void debug_string_to_mask(char *, void *, int);
 130void do_c_mask(int, char *, struct client_debug_mask **);
 131void do_k_mask(int, char *, __u64 **);
 132
 133void debug_mask_to_string(void *, int);
 134void do_k_string(void *, int);
 135void do_c_string(void *, int);
 136int check_amalgam_keyword(void *, int);
 137int keyword_is_amalgam(char *);
 138
 139/*these variables are defined in orangefs-mod.c */
 140extern char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
 141extern char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
 142extern char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
 143extern unsigned int kernel_mask_set_mod_init;
 144
 145extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
 146extern const struct xattr_handler *orangefs_xattr_handlers[];
 147
 148extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
 149extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 150
 151/*
 152 * Redefine xtvec structure so that we could move helper functions out of
 153 * the define
 154 */
 155struct xtvec {
 156        __kernel_off_t xtv_off;         /* must be off_t */
 157        __kernel_size_t xtv_len;        /* must be size_t */
 158};
 159
 160/*
 161 * orangefs data structures
 162 */
 163struct orangefs_kernel_op_s {
 164        enum orangefs_vfs_op_states op_state;
 165        __u64 tag;
 166
 167        /*
 168         * Set uses_shared_memory to non zero if this operation uses
 169         * shared memory. If true, then a retry on the op must also
 170         * get a new shared memory buffer and re-populate it.
 171         * Cancels don't care - it only matters for service_operation()
 172         * retry logics and cancels don't go through it anymore. It
 173         * safely stays non-zero when we use it as slot_to_free.
 174         */
 175        union {
 176                int uses_shared_memory;
 177                int slot_to_free;
 178        };
 179
 180        struct orangefs_upcall_s upcall;
 181        struct orangefs_downcall_s downcall;
 182
 183        struct completion waitq;
 184        spinlock_t lock;
 185
 186        int attempts;
 187
 188        struct list_head list;
 189};
 190
 191#define set_op_state_waiting(op)     ((op)->op_state = OP_VFS_STATE_WAITING)
 192#define set_op_state_inprogress(op)  ((op)->op_state = OP_VFS_STATE_INPROGR)
 193#define set_op_state_given_up(op)  ((op)->op_state = OP_VFS_STATE_GIVEN_UP)
 194static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op)
 195{
 196        op->op_state = OP_VFS_STATE_SERVICED;
 197        complete(&op->waitq);
 198}
 199
 200#define op_state_waiting(op)     ((op)->op_state & OP_VFS_STATE_WAITING)
 201#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR)
 202#define op_state_serviced(op)    ((op)->op_state & OP_VFS_STATE_SERVICED)
 203#define op_state_purged(op)      ((op)->op_state & OP_VFS_STATE_PURGED)
 204#define op_state_given_up(op)    ((op)->op_state & OP_VFS_STATE_GIVEN_UP)
 205#define op_is_cancel(op)         ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL)
 206
 207void op_release(struct orangefs_kernel_op_s *op);
 208
 209extern void orangefs_bufmap_put(int);
 210static inline void put_cancel(struct orangefs_kernel_op_s *op)
 211{
 212        orangefs_bufmap_put(op->slot_to_free);
 213        op_release(op);
 214}
 215
 216static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
 217{
 218        spin_lock(&op->lock);
 219        if (unlikely(op_is_cancel(op))) {
 220                list_del_init(&op->list);
 221                spin_unlock(&op->lock);
 222                put_cancel(op);
 223        } else {
 224                op->op_state |= OP_VFS_STATE_PURGED;
 225                complete(&op->waitq);
 226                spin_unlock(&op->lock);
 227        }
 228}
 229
 230/* per inode private orangefs info */
 231struct orangefs_inode_s {
 232        struct orangefs_object_kref refn;
 233        char link_target[ORANGEFS_NAME_MAX];
 234        __s64 blksize;
 235        /*
 236         * Reading/Writing Extended attributes need to acquire the appropriate
 237         * reader/writer semaphore on the orangefs_inode_s structure.
 238         */
 239        struct rw_semaphore xattr_sem;
 240
 241        struct inode vfs_inode;
 242        sector_t last_failed_block_index_read;
 243
 244        /*
 245         * State of in-memory attributes not yet flushed to disk associated
 246         * with this object
 247         */
 248        unsigned long pinode_flags;
 249
 250        unsigned long getattr_time;
 251};
 252
 253#define P_ATIME_FLAG 0
 254#define P_MTIME_FLAG 1
 255#define P_CTIME_FLAG 2
 256#define P_MODE_FLAG  3
 257
 258#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
 259#define SetAtimeFlag(pinode)   set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
 260#define AtimeFlag(pinode)      test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
 261
 262#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
 263#define SetMtimeFlag(pinode)   set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
 264#define MtimeFlag(pinode)      test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
 265
 266#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
 267#define SetCtimeFlag(pinode)   set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
 268#define CtimeFlag(pinode)      test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
 269
 270#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
 271#define SetModeFlag(pinode)   set_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
 272#define ModeFlag(pinode)      test_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
 273
 274/* per superblock private orangefs info */
 275struct orangefs_sb_info_s {
 276        struct orangefs_khandle root_khandle;
 277        __s32 fs_id;
 278        int id;
 279        int flags;
 280#define ORANGEFS_OPT_INTR       0x01
 281#define ORANGEFS_OPT_LOCAL_LOCK 0x02
 282        char devname[ORANGEFS_MAX_SERVER_ADDR_LEN];
 283        struct super_block *sb;
 284        int mount_pending;
 285        struct list_head list;
 286};
 287
 288/*
 289 * structure that holds the state of any async I/O operation issued
 290 * through the VFS. Needed especially to handle cancellation requests
 291 * or even completion notification so that the VFS client-side daemon
 292 * can free up its vfs_request slots.
 293 */
 294struct orangefs_kiocb_s {
 295        /* the pointer to the task that initiated the AIO */
 296        struct task_struct *tsk;
 297
 298        /* pointer to the kiocb that kicked this operation */
 299        struct kiocb *kiocb;
 300
 301        /* buffer index that was used for the I/O */
 302        struct orangefs_bufmap *bufmap;
 303        int buffer_index;
 304
 305        /* orangefs kernel operation type */
 306        struct orangefs_kernel_op_s *op;
 307
 308        /* The user space buffers from/to which I/O is being staged */
 309        struct iovec *iov;
 310
 311        /* number of elements in the iovector */
 312        unsigned long nr_segs;
 313
 314        /* set to indicate the type of the operation */
 315        int rw;
 316
 317        /* file offset */
 318        loff_t offset;
 319
 320        /* and the count in bytes */
 321        size_t bytes_to_be_copied;
 322
 323        ssize_t bytes_copied;
 324        int needs_cleanup;
 325};
 326
 327struct orangefs_stats {
 328        unsigned long cache_hits;
 329        unsigned long cache_misses;
 330        unsigned long reads;
 331        unsigned long writes;
 332};
 333
 334extern struct orangefs_stats g_orangefs_stats;
 335
 336/*
 337 * NOTE: See Documentation/filesystems/porting for information
 338 * on implementing FOO_I and properly accessing fs private data
 339 */
 340static inline struct orangefs_inode_s *ORANGEFS_I(struct inode *inode)
 341{
 342        return container_of(inode, struct orangefs_inode_s, vfs_inode);
 343}
 344
 345static inline struct orangefs_sb_info_s *ORANGEFS_SB(struct super_block *sb)
 346{
 347        return (struct orangefs_sb_info_s *) sb->s_fs_info;
 348}
 349
 350/* ino_t descends from "unsigned long", 8 bytes, 64 bits. */
 351static inline ino_t orangefs_khandle_to_ino(struct orangefs_khandle *khandle)
 352{
 353        union {
 354                unsigned char u[8];
 355                __u64 ino;
 356        } ihandle;
 357
 358        ihandle.u[0] = khandle->u[0] ^ khandle->u[4];
 359        ihandle.u[1] = khandle->u[1] ^ khandle->u[5];
 360        ihandle.u[2] = khandle->u[2] ^ khandle->u[6];
 361        ihandle.u[3] = khandle->u[3] ^ khandle->u[7];
 362        ihandle.u[4] = khandle->u[12] ^ khandle->u[8];
 363        ihandle.u[5] = khandle->u[13] ^ khandle->u[9];
 364        ihandle.u[6] = khandle->u[14] ^ khandle->u[10];
 365        ihandle.u[7] = khandle->u[15] ^ khandle->u[11];
 366
 367        return ihandle.ino;
 368}
 369
 370static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
 371{
 372        return &(ORANGEFS_I(inode)->refn.khandle);
 373}
 374
 375static inline __s32 get_fsid_from_ino(struct inode *inode)
 376{
 377        return ORANGEFS_I(inode)->refn.fs_id;
 378}
 379
 380static inline ino_t get_ino_from_khandle(struct inode *inode)
 381{
 382        struct orangefs_khandle *khandle;
 383        ino_t ino;
 384
 385        khandle = get_khandle_from_ino(inode);
 386        ino = orangefs_khandle_to_ino(khandle);
 387        return ino;
 388}
 389
 390static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry)
 391{
 392        return get_ino_from_khandle(dentry->d_parent->d_inode);
 393}
 394
 395static inline int is_root_handle(struct inode *inode)
 396{
 397        gossip_debug(GOSSIP_DCACHE_DEBUG,
 398                     "%s: root handle: %pU, this handle: %pU:\n",
 399                     __func__,
 400                     &ORANGEFS_SB(inode->i_sb)->root_khandle,
 401                     get_khandle_from_ino(inode));
 402
 403        if (ORANGEFS_khandle_cmp(&(ORANGEFS_SB(inode->i_sb)->root_khandle),
 404                             get_khandle_from_ino(inode)))
 405                return 0;
 406        else
 407                return 1;
 408}
 409
 410static inline int match_handle(struct orangefs_khandle resp_handle,
 411                               struct inode *inode)
 412{
 413        gossip_debug(GOSSIP_DCACHE_DEBUG,
 414                     "%s: one handle: %pU, another handle:%pU:\n",
 415                     __func__,
 416                     &resp_handle,
 417                     get_khandle_from_ino(inode));
 418
 419        if (ORANGEFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode)))
 420                return 0;
 421        else
 422                return 1;
 423}
 424
 425/*
 426 * defined in orangefs-cache.c
 427 */
 428int op_cache_initialize(void);
 429int op_cache_finalize(void);
 430struct orangefs_kernel_op_s *op_alloc(__s32 type);
 431void orangefs_new_tag(struct orangefs_kernel_op_s *op);
 432char *get_opname_string(struct orangefs_kernel_op_s *new_op);
 433
 434int orangefs_inode_cache_initialize(void);
 435int orangefs_inode_cache_finalize(void);
 436
 437/*
 438 * defined in orangefs-mod.c
 439 */
 440void purge_inprogress_ops(void);
 441
 442/*
 443 * defined in waitqueue.c
 444 */
 445void purge_waiting_ops(void);
 446
 447/*
 448 * defined in super.c
 449 */
 450struct dentry *orangefs_mount(struct file_system_type *fst,
 451                           int flags,
 452                           const char *devname,
 453                           void *data);
 454
 455void orangefs_kill_sb(struct super_block *sb);
 456int orangefs_remount(struct orangefs_sb_info_s *);
 457
 458int fsid_key_table_initialize(void);
 459void fsid_key_table_finalize(void);
 460
 461/*
 462 * defined in inode.c
 463 */
 464__u32 convert_to_orangefs_mask(unsigned long lite_mask);
 465struct inode *orangefs_new_inode(struct super_block *sb,
 466                              struct inode *dir,
 467                              int mode,
 468                              dev_t dev,
 469                              struct orangefs_object_kref *ref);
 470
 471int orangefs_setattr(struct dentry *dentry, struct iattr *iattr);
 472
 473int orangefs_getattr(struct vfsmount *mnt,
 474                  struct dentry *dentry,
 475                  struct kstat *kstat);
 476
 477int orangefs_permission(struct inode *inode, int mask);
 478
 479/*
 480 * defined in xattr.c
 481 */
 482int orangefs_setxattr(struct dentry *dentry,
 483                   const char *name,
 484                   const void *value,
 485                   size_t size,
 486                   int flags);
 487
 488ssize_t orangefs_getxattr(struct dentry *dentry,
 489                       const char *name,
 490                       void *buffer,
 491                       size_t size);
 492
 493ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 494
 495/*
 496 * defined in namei.c
 497 */
 498struct inode *orangefs_iget(struct super_block *sb,
 499                         struct orangefs_object_kref *ref);
 500
 501ssize_t orangefs_inode_read(struct inode *inode,
 502                            struct iov_iter *iter,
 503                            loff_t *offset,
 504                            loff_t readahead_size);
 505
 506/*
 507 * defined in devorangefs-req.c
 508 */
 509int orangefs_dev_init(void);
 510void orangefs_dev_cleanup(void);
 511int is_daemon_in_service(void);
 512bool __is_daemon_in_service(void);
 513
 514/*
 515 * defined in orangefs-utils.c
 516 */
 517__s32 fsid_of_op(struct orangefs_kernel_op_s *op);
 518
 519int orangefs_flush_inode(struct inode *inode);
 520
 521ssize_t orangefs_inode_getxattr(struct inode *inode,
 522                             const char *name,
 523                             void *buffer,
 524                             size_t size);
 525
 526int orangefs_inode_setxattr(struct inode *inode,
 527                         const char *name,
 528                         const void *value,
 529                         size_t size,
 530                         int flags);
 531
 532int orangefs_inode_getattr(struct inode *inode, int new, int bypass);
 533
 534int orangefs_inode_check_changed(struct inode *inode);
 535
 536int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
 537
 538void orangefs_make_bad_inode(struct inode *inode);
 539
 540int orangefs_unmount_sb(struct super_block *sb);
 541
 542bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
 543
 544int orangefs_normalize_to_errno(__s32 error_code);
 545
 546extern struct mutex devreq_mutex;
 547extern struct mutex request_mutex;
 548extern int debug;
 549extern int op_timeout_secs;
 550extern int slot_timeout_secs;
 551extern int dcache_timeout_msecs;
 552extern int getattr_timeout_msecs;
 553extern struct list_head orangefs_superblocks;
 554extern spinlock_t orangefs_superblocks_lock;
 555extern struct list_head orangefs_request_list;
 556extern spinlock_t orangefs_request_list_lock;
 557extern wait_queue_head_t orangefs_request_list_waitq;
 558extern struct list_head *htable_ops_in_progress;
 559extern spinlock_t htable_ops_in_progress_lock;
 560extern int hash_table_size;
 561
 562extern const struct address_space_operations orangefs_address_operations;
 563extern struct backing_dev_info orangefs_backing_dev_info;
 564extern const struct inode_operations orangefs_file_inode_operations;
 565extern const struct file_operations orangefs_file_operations;
 566extern const struct inode_operations orangefs_symlink_inode_operations;
 567extern const struct inode_operations orangefs_dir_inode_operations;
 568extern const struct file_operations orangefs_dir_operations;
 569extern const struct dentry_operations orangefs_dentry_operations;
 570extern const struct file_operations orangefs_devreq_file_operations;
 571
 572extern wait_queue_head_t orangefs_bufmap_init_waitq;
 573
 574/*
 575 * misc convenience macros
 576 */
 577
 578#define ORANGEFS_OP_INTERRUPTIBLE 1   /* service_operation() is interruptible */
 579#define ORANGEFS_OP_PRIORITY      2   /* service_operation() is high priority */
 580#define ORANGEFS_OP_CANCELLATION  4   /* this is a cancellation */
 581#define ORANGEFS_OP_NO_MUTEX      8   /* don't acquire request_mutex */
 582#define ORANGEFS_OP_ASYNC         16  /* Queue it, but don't wait */
 583
 584int service_operation(struct orangefs_kernel_op_s *op,
 585                      const char *op_name,
 586                      int flags);
 587
 588#define get_interruptible_flag(inode) \
 589        ((ORANGEFS_SB(inode->i_sb)->flags & ORANGEFS_OPT_INTR) ? \
 590                ORANGEFS_OP_INTERRUPTIBLE : 0)
 591
 592#define fill_default_sys_attrs(sys_attr, type, mode)                    \
 593do {                                                                    \
 594        sys_attr.owner = from_kuid(&init_user_ns, current_fsuid()); \
 595        sys_attr.group = from_kgid(&init_user_ns, current_fsgid()); \
 596        sys_attr.perms = ORANGEFS_util_translate_mode(mode);            \
 597        sys_attr.mtime = 0;                                             \
 598        sys_attr.atime = 0;                                             \
 599        sys_attr.ctime = 0;                                             \
 600        sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE;                  \
 601} while (0)
 602
 603static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
 604{
 605#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
 606        inode_lock(inode);
 607#endif
 608        i_size_write(inode, i_size);
 609#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
 610        inode_unlock(inode);
 611#endif
 612}
 613
 614#endif /* __ORANGEFSKERNEL_H */
 615