linux/fs/orangefs/orangefs-kernel.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * (C) 2001 Clemson University and The University of Chicago
   4 *
   5 * See COPYING in top-level directory.
   6 */
   7
   8/*
   9 *  The ORANGEFS Linux kernel support allows ORANGEFS volumes to be mounted and
  10 *  accessed through the Linux VFS (i.e. using standard I/O system calls).
  11 *  This support is only needed on clients that wish to mount the file system.
  12 *
  13 */
  14
  15/*
  16 *  Declarations and macros for the ORANGEFS Linux kernel support.
  17 */
  18
  19#ifndef __ORANGEFSKERNEL_H
  20#define __ORANGEFSKERNEL_H
  21
  22#include <linux/kernel.h>
  23#include <linux/moduleparam.h>
  24#include <linux/statfs.h>
  25#include <linux/backing-dev.h>
  26#include <linux/device.h>
  27#include <linux/mpage.h>
  28#include <linux/namei.h>
  29#include <linux/errno.h>
  30#include <linux/init.h>
  31#include <linux/module.h>
  32#include <linux/slab.h>
  33#include <linux/types.h>
  34#include <linux/fs.h>
  35#include <linux/vmalloc.h>
  36
  37#include <linux/aio.h>
  38#include <linux/posix_acl.h>
  39#include <linux/posix_acl_xattr.h>
  40#include <linux/compat.h>
  41#include <linux/mount.h>
  42#include <linux/uaccess.h>
  43#include <linux/atomic.h>
  44#include <linux/uio.h>
  45#include <linux/sched/signal.h>
  46#include <linux/mm.h>
  47#include <linux/wait.h>
  48#include <linux/dcache.h>
  49#include <linux/pagemap.h>
  50#include <linux/poll.h>
  51#include <linux/rwsem.h>
  52#include <linux/xattr.h>
  53#include <linux/exportfs.h>
  54#include <linux/hashtable.h>
  55
  56#include <asm/unaligned.h>
  57
  58#include "orangefs-dev-proto.h"
  59
  60#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS       20
  61
  62#define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS   30
  63
  64#define ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS     900      /* 15 minutes */
  65
  66#define ORANGEFS_REQDEVICE_NAME          "pvfs2-req"
  67
  68#define ORANGEFS_DEVREQ_MAGIC             0x20030529
  69#define ORANGEFS_PURGE_RETRY_COUNT     0x00000005
  70
  71#define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) +   \
  72sizeof(__u64) + sizeof(struct orangefs_upcall_s))
  73#define MAX_DEV_REQ_DOWNSIZE (2 * sizeof(__s32) + \
  74sizeof(__u64) + sizeof(struct orangefs_downcall_s))
  75
  76/*
  77 * valid orangefs kernel operation states
  78 *
  79 * unknown  - op was just initialized
  80 * waiting  - op is on request_list (upward bound)
  81 * inprogr  - op is in progress (waiting for downcall)
  82 * serviced - op has matching downcall; ok
  83 * purged   - op has to start a timer since client-core
  84 *            exited uncleanly before servicing op
  85 * given up - submitter has given up waiting for it
  86 */
  87enum orangefs_vfs_op_states {
  88        OP_VFS_STATE_UNKNOWN = 0,
  89        OP_VFS_STATE_WAITING = 1,
  90        OP_VFS_STATE_INPROGR = 2,
  91        OP_VFS_STATE_SERVICED = 4,
  92        OP_VFS_STATE_PURGED = 8,
  93        OP_VFS_STATE_GIVEN_UP = 16,
  94};
  95
  96/*
  97 * orangefs kernel memory related flags
  98 */
  99
 100#if (defined CONFIG_DEBUG_SLAB)
 101#define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
 102#else
 103#define ORANGEFS_CACHE_CREATE_FLAGS 0
 104#endif
 105
 106extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
 107extern const struct xattr_handler *orangefs_xattr_handlers[];
 108
 109extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu);
 110extern int orangefs_set_acl(struct user_namespace *mnt_userns,
 111                            struct inode *inode, struct posix_acl *acl,
 112                            int type);
 113
 114/*
 115 * orangefs data structures
 116 */
 117struct orangefs_kernel_op_s {
 118        enum orangefs_vfs_op_states op_state;
 119        __u64 tag;
 120
 121        /*
 122         * Set uses_shared_memory to non zero if this operation uses
 123         * shared memory. If true, then a retry on the op must also
 124         * get a new shared memory buffer and re-populate it.
 125         * Cancels don't care - it only matters for service_operation()
 126         * retry logics and cancels don't go through it anymore. It
 127         * safely stays non-zero when we use it as slot_to_free.
 128         */
 129        union {
 130                int uses_shared_memory;
 131                int slot_to_free;
 132        };
 133
 134        struct orangefs_upcall_s upcall;
 135        struct orangefs_downcall_s downcall;
 136
 137        struct completion waitq;
 138        spinlock_t lock;
 139
 140        int attempts;
 141
 142        struct list_head list;
 143};
 144
 145#define set_op_state_waiting(op)     ((op)->op_state = OP_VFS_STATE_WAITING)
 146#define set_op_state_inprogress(op)  ((op)->op_state = OP_VFS_STATE_INPROGR)
 147#define set_op_state_given_up(op)  ((op)->op_state = OP_VFS_STATE_GIVEN_UP)
 148static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op)
 149{
 150        op->op_state = OP_VFS_STATE_SERVICED;
 151        complete(&op->waitq);
 152}
 153
 154#define op_state_waiting(op)     ((op)->op_state & OP_VFS_STATE_WAITING)
 155#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR)
 156#define op_state_serviced(op)    ((op)->op_state & OP_VFS_STATE_SERVICED)
 157#define op_state_purged(op)      ((op)->op_state & OP_VFS_STATE_PURGED)
 158#define op_state_given_up(op)    ((op)->op_state & OP_VFS_STATE_GIVEN_UP)
 159#define op_is_cancel(op)         ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL)
 160
 161void op_release(struct orangefs_kernel_op_s *op);
 162
 163extern void orangefs_bufmap_put(int);
 164static inline void put_cancel(struct orangefs_kernel_op_s *op)
 165{
 166        orangefs_bufmap_put(op->slot_to_free);
 167        op_release(op);
 168}
 169
 170static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
 171{
 172        spin_lock(&op->lock);
 173        if (unlikely(op_is_cancel(op))) {
 174                list_del_init(&op->list);
 175                spin_unlock(&op->lock);
 176                put_cancel(op);
 177        } else {
 178                op->op_state |= OP_VFS_STATE_PURGED;
 179                complete(&op->waitq);
 180                spin_unlock(&op->lock);
 181        }
 182}
 183
 184/* per inode private orangefs info */
 185struct orangefs_inode_s {
 186        struct orangefs_object_kref refn;
 187        char link_target[ORANGEFS_NAME_MAX];
 188        /*
 189         * Reading/Writing Extended attributes need to acquire the appropriate
 190         * reader/writer semaphore on the orangefs_inode_s structure.
 191         */
 192        struct rw_semaphore xattr_sem;
 193
 194        struct inode vfs_inode;
 195        sector_t last_failed_block_index_read;
 196
 197        unsigned long getattr_time;
 198        unsigned long mapping_time;
 199        int attr_valid;
 200        kuid_t attr_uid;
 201        kgid_t attr_gid;
 202        unsigned long bitlock;
 203
 204        DECLARE_HASHTABLE(xattr_cache, 4);
 205};
 206
 207/* per superblock private orangefs info */
 208struct orangefs_sb_info_s {
 209        struct orangefs_khandle root_khandle;
 210        __s32 fs_id;
 211        int id;
 212        int flags;
 213#define ORANGEFS_OPT_INTR       0x01
 214#define ORANGEFS_OPT_LOCAL_LOCK 0x02
 215        char devname[ORANGEFS_MAX_SERVER_ADDR_LEN];
 216        struct super_block *sb;
 217        int mount_pending;
 218        int no_list;
 219        struct list_head list;
 220};
 221
 222struct orangefs_stats {
 223        unsigned long cache_hits;
 224        unsigned long cache_misses;
 225        unsigned long reads;
 226        unsigned long writes;
 227};
 228
 229struct orangefs_cached_xattr {
 230        struct hlist_node node;
 231        char key[ORANGEFS_MAX_XATTR_NAMELEN];
 232        char val[ORANGEFS_MAX_XATTR_VALUELEN];
 233        ssize_t length;
 234        unsigned long timeout;
 235};
 236
 237struct orangefs_write_range {
 238        loff_t pos;
 239        size_t len;
 240        kuid_t uid;
 241        kgid_t gid;
 242};
 243
 244extern struct orangefs_stats orangefs_stats;
 245
 246/*
 247 * NOTE: See Documentation/filesystems/porting.rst for information
 248 * on implementing FOO_I and properly accessing fs private data
 249 */
 250static inline struct orangefs_inode_s *ORANGEFS_I(struct inode *inode)
 251{
 252        return container_of(inode, struct orangefs_inode_s, vfs_inode);
 253}
 254
 255static inline struct orangefs_sb_info_s *ORANGEFS_SB(struct super_block *sb)
 256{
 257        return (struct orangefs_sb_info_s *) sb->s_fs_info;
 258}
 259
 260/* ino_t descends from "unsigned long", 8 bytes, 64 bits. */
 261static inline ino_t orangefs_khandle_to_ino(struct orangefs_khandle *khandle)
 262{
 263        union {
 264                unsigned char u[8];
 265                __u64 ino;
 266        } ihandle;
 267
 268        ihandle.u[0] = khandle->u[0] ^ khandle->u[4];
 269        ihandle.u[1] = khandle->u[1] ^ khandle->u[5];
 270        ihandle.u[2] = khandle->u[2] ^ khandle->u[6];
 271        ihandle.u[3] = khandle->u[3] ^ khandle->u[7];
 272        ihandle.u[4] = khandle->u[12] ^ khandle->u[8];
 273        ihandle.u[5] = khandle->u[13] ^ khandle->u[9];
 274        ihandle.u[6] = khandle->u[14] ^ khandle->u[10];
 275        ihandle.u[7] = khandle->u[15] ^ khandle->u[11];
 276
 277        return ihandle.ino;
 278}
 279
 280static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
 281{
 282        return &(ORANGEFS_I(inode)->refn.khandle);
 283}
 284
 285static inline int is_root_handle(struct inode *inode)
 286{
 287        gossip_debug(GOSSIP_DCACHE_DEBUG,
 288                     "%s: root handle: %pU, this handle: %pU:\n",
 289                     __func__,
 290                     &ORANGEFS_SB(inode->i_sb)->root_khandle,
 291                     get_khandle_from_ino(inode));
 292
 293        if (ORANGEFS_khandle_cmp(&(ORANGEFS_SB(inode->i_sb)->root_khandle),
 294                             get_khandle_from_ino(inode)))
 295                return 0;
 296        else
 297                return 1;
 298}
 299
 300static inline int match_handle(struct orangefs_khandle resp_handle,
 301                               struct inode *inode)
 302{
 303        gossip_debug(GOSSIP_DCACHE_DEBUG,
 304                     "%s: one handle: %pU, another handle:%pU:\n",
 305                     __func__,
 306                     &resp_handle,
 307                     get_khandle_from_ino(inode));
 308
 309        if (ORANGEFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode)))
 310                return 0;
 311        else
 312                return 1;
 313}
 314
 315/*
 316 * defined in orangefs-cache.c
 317 */
 318int op_cache_initialize(void);
 319int op_cache_finalize(void);
 320struct orangefs_kernel_op_s *op_alloc(__s32 type);
 321void orangefs_new_tag(struct orangefs_kernel_op_s *op);
 322char *get_opname_string(struct orangefs_kernel_op_s *new_op);
 323
 324int orangefs_inode_cache_initialize(void);
 325int orangefs_inode_cache_finalize(void);
 326
 327/*
 328 * defined in orangefs-mod.c
 329 */
 330void purge_inprogress_ops(void);
 331
 332/*
 333 * defined in waitqueue.c
 334 */
 335void purge_waiting_ops(void);
 336
 337/*
 338 * defined in super.c
 339 */
 340extern uint64_t orangefs_features;
 341
 342struct dentry *orangefs_mount(struct file_system_type *fst,
 343                           int flags,
 344                           const char *devname,
 345                           void *data);
 346
 347void orangefs_kill_sb(struct super_block *sb);
 348int orangefs_remount(struct orangefs_sb_info_s *);
 349
 350int fsid_key_table_initialize(void);
 351void fsid_key_table_finalize(void);
 352
 353/*
 354 * defined in inode.c
 355 */
 356vm_fault_t orangefs_page_mkwrite(struct vm_fault *);
 357struct inode *orangefs_new_inode(struct super_block *sb,
 358                              struct inode *dir,
 359                              int mode,
 360                              dev_t dev,
 361                              struct orangefs_object_kref *ref);
 362
 363int __orangefs_setattr(struct inode *, struct iattr *);
 364int orangefs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
 365
 366int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
 367                     struct kstat *stat, u32 request_mask, unsigned int flags);
 368
 369int orangefs_permission(struct user_namespace *mnt_userns,
 370                        struct inode *inode, int mask);
 371
 372int orangefs_update_time(struct inode *, struct timespec64 *, int);
 373
 374/*
 375 * defined in xattr.c
 376 */
 377ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 378
 379/*
 380 * defined in namei.c
 381 */
 382struct inode *orangefs_iget(struct super_block *sb,
 383                         struct orangefs_object_kref *ref);
 384
 385/*
 386 * defined in devorangefs-req.c
 387 */
 388extern uint32_t orangefs_userspace_version;
 389
 390int orangefs_dev_init(void);
 391void orangefs_dev_cleanup(void);
 392int is_daemon_in_service(void);
 393bool __is_daemon_in_service(void);
 394
 395/*
 396 * defined in file.c
 397 */
 398int orangefs_revalidate_mapping(struct inode *);
 399ssize_t wait_for_direct_io(enum ORANGEFS_io_type, struct inode *, loff_t *,
 400    struct iov_iter *, size_t, loff_t, struct orangefs_write_range *, int *,
 401    struct file *);
 402ssize_t do_readv_writev(enum ORANGEFS_io_type, struct file *, loff_t *,
 403    struct iov_iter *);
 404
 405/*
 406 * defined in orangefs-utils.c
 407 */
 408__s32 fsid_of_op(struct orangefs_kernel_op_s *op);
 409
 410ssize_t orangefs_inode_getxattr(struct inode *inode,
 411                             const char *name,
 412                             void *buffer,
 413                             size_t size);
 414
 415int orangefs_inode_setxattr(struct inode *inode,
 416                         const char *name,
 417                         const void *value,
 418                         size_t size,
 419                         int flags);
 420
 421#define ORANGEFS_GETATTR_NEW 1
 422#define ORANGEFS_GETATTR_SIZE 2
 423
 424int orangefs_inode_getattr(struct inode *, int);
 425
 426int orangefs_inode_check_changed(struct inode *inode);
 427
 428int orangefs_inode_setattr(struct inode *inode);
 429
 430bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
 431
 432int orangefs_normalize_to_errno(__s32 error_code);
 433
 434extern struct mutex orangefs_request_mutex;
 435extern int op_timeout_secs;
 436extern int slot_timeout_secs;
 437extern int orangefs_cache_timeout_msecs;
 438extern int orangefs_dcache_timeout_msecs;
 439extern int orangefs_getattr_timeout_msecs;
 440extern struct list_head orangefs_superblocks;
 441extern spinlock_t orangefs_superblocks_lock;
 442extern struct list_head orangefs_request_list;
 443extern spinlock_t orangefs_request_list_lock;
 444extern wait_queue_head_t orangefs_request_list_waitq;
 445extern struct list_head *orangefs_htable_ops_in_progress;
 446extern spinlock_t orangefs_htable_ops_in_progress_lock;
 447extern int hash_table_size;
 448
 449extern const struct file_operations orangefs_file_operations;
 450extern const struct inode_operations orangefs_symlink_inode_operations;
 451extern const struct inode_operations orangefs_dir_inode_operations;
 452extern const struct file_operations orangefs_dir_operations;
 453extern const struct dentry_operations orangefs_dentry_operations;
 454
 455/*
 456 * misc convenience macros
 457 */
 458
 459#define ORANGEFS_OP_INTERRUPTIBLE 1   /* service_operation() is interruptible */
 460#define ORANGEFS_OP_PRIORITY      2   /* service_operation() is high priority */
 461#define ORANGEFS_OP_CANCELLATION  4   /* this is a cancellation */
 462#define ORANGEFS_OP_NO_MUTEX      8   /* don't acquire request_mutex */
 463#define ORANGEFS_OP_ASYNC         16  /* Queue it, but don't wait */
 464#define ORANGEFS_OP_WRITEBACK     32
 465
 466int service_operation(struct orangefs_kernel_op_s *op,
 467                      const char *op_name,
 468                      int flags);
 469
 470#define get_interruptible_flag(inode) \
 471        ((ORANGEFS_SB(inode->i_sb)->flags & ORANGEFS_OPT_INTR) ? \
 472                ORANGEFS_OP_INTERRUPTIBLE : 0)
 473
 474#define fill_default_sys_attrs(sys_attr, type, mode)                    \
 475do {                                                                    \
 476        sys_attr.owner = from_kuid(&init_user_ns, current_fsuid()); \
 477        sys_attr.group = from_kgid(&init_user_ns, current_fsgid()); \
 478        sys_attr.perms = ORANGEFS_util_translate_mode(mode);            \
 479        sys_attr.mtime = 0;                                             \
 480        sys_attr.atime = 0;                                             \
 481        sys_attr.ctime = 0;                                             \
 482        sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE;                  \
 483} while (0)
 484
 485static inline void orangefs_set_timeout(struct dentry *dentry)
 486{
 487        unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
 488
 489        dentry->d_fsdata = (void *) time;
 490}
 491
 492#endif /* __ORANGEFSKERNEL_H */
 493