linux/drivers/staging/lustre/lustre/include/lustre_disk.h
<<
>>
Prefs
   1/*
   2 * GPL HEADER START
   3 *
   4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 only,
   8 * as published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 * General Public License version 2 for more details (a copy is included
  14 * in the LICENSE file that accompanied this code).
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * version 2 along with this program; If not, see
  18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  19 *
  20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  21 * CA 95054 USA or visit www.sun.com if you need additional information or
  22 * have any questions.
  23 *
  24 * GPL HEADER END
  25 */
  26/*
  27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  28 * Use is subject to license terms.
  29 *
  30 * Copyright (c) 2011, 2012, Intel Corporation.
  31 */
  32/*
  33 * This file is part of Lustre, http://www.lustre.org/
  34 * Lustre is a trademark of Sun Microsystems, Inc.
  35 *
  36 * lustre/include/lustre_disk.h
  37 *
  38 * Lustre disk format definitions.
  39 *
  40 * Author: Nathan Rutman <nathan@clusterfs.com>
  41 */
  42
  43#ifndef _LUSTRE_DISK_H
  44#define _LUSTRE_DISK_H
  45
  46/** \defgroup disk disk
  47 *
  48 * @{
  49 */
  50
  51#include "../../include/linux/libcfs/libcfs.h"
  52#include "../../include/linux/lnet/types.h"
  53#include <linux/backing-dev.h>
  54
  55/****************** persistent mount data *********************/
  56
  57#define LDD_F_SV_TYPE_MDT   0x0001
  58#define LDD_F_SV_TYPE_OST   0x0002
  59#define LDD_F_SV_TYPE_MGS   0x0004
  60#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT  | \
  61                            LDD_F_SV_TYPE_OST  | \
  62                            LDD_F_SV_TYPE_MGS)
  63#define LDD_F_SV_ALL    0x0008
  64
  65/****************** mount command *********************/
  66
  67/* The lmd is only used internally by Lustre; mount simply passes
  68   everything as string options */
  69
  70#define LMD_MAGIC    0xbdacbd03
  71
  72/* gleaned from the mount command - no persistent info here */
  73struct lustre_mount_data {
  74        __u32      lmd_magic;
  75        __u32      lmd_flags;    /* lustre mount flags */
  76        int     lmd_mgs_failnodes; /* mgs failover node count */
  77        int     lmd_exclude_count;
  78        int     lmd_recovery_time_soft;
  79        int     lmd_recovery_time_hard;
  80        char      *lmd_dev;        /* device name */
  81        char      *lmd_profile;       /* client only */
  82        char      *lmd_mgssec;  /* sptlrpc flavor to mgs */
  83        char      *lmd_opts;      /* lustre mount options (as opposed to
  84                                         _device_ mount options) */
  85        char      *lmd_params;  /* lustre params */
  86        __u32     *lmd_exclude;       /* array of OSTs to ignore */
  87        char    *lmd_mgs;          /* MGS nid */
  88        char    *lmd_osd_type;      /* OSD type */
  89};
  90
  91#define LMD_FLG_SERVER          0x0001  /* Mounting a server */
  92#define LMD_FLG_CLIENT          0x0002  /* Mounting a client */
  93#define LMD_FLG_ABORT_RECOV     0x0008  /* Abort recovery */
  94#define LMD_FLG_NOSVC           0x0010  /* Only start MGS/MGC for servers,
  95                                           no other services */
  96#define LMD_FLG_NOMGS           0x0020  /* Only start target for servers, reusing
  97                                           existing MGS services */
  98#define LMD_FLG_WRITECONF       0x0040  /* Rewrite config log */
  99#define LMD_FLG_NOIR            0x0080  /* NO imperative recovery */
 100#define LMD_FLG_NOSCRUB         0x0100  /* Do not trigger scrub automatically */
 101#define LMD_FLG_MGS             0x0200  /* Also start MGS along with server */
 102#define LMD_FLG_IAM             0x0400  /* IAM dir */
 103#define LMD_FLG_NO_PRIMNODE     0x0800  /* all nodes are service nodes */
 104#define LMD_FLG_VIRGIN          0x1000  /* the service registers first time */
 105#define LMD_FLG_UPDATE          0x2000  /* update parameters */
 106#define LMD_FLG_HSM             0x4000  /* Start coordinator */
 107
 108#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
 109
 110/****************** last_rcvd file *********************/
 111
 112/** version recovery epoch */
 113#define LR_EPOCH_BITS   32
 114#define lr_epoch(a) ((a) >> LR_EPOCH_BITS)
 115#define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
 116#define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */
 117
 118#define LR_SERVER_SIZE   512
 119#define LR_CLIENT_START 8192
 120#define LR_CLIENT_SIZE   128
 121#if LR_CLIENT_START < LR_SERVER_SIZE
 122#error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
 123#endif
 124
 125/*
 126 * This limit is arbitrary (131072 clients on x86), but it is convenient to use
 127 * 2^n * PAGE_CACHE_SIZE * 8 for the number of bits that fit an order-n allocation.
 128 * If we need more than 131072 clients (order-2 allocation on x86) then this
 129 * should become an array of single-page pointers that are allocated on demand.
 130 */
 131#if (128 * 1024UL) > (PAGE_CACHE_SIZE * 8)
 132#define LR_MAX_CLIENTS (128 * 1024UL)
 133#else
 134#define LR_MAX_CLIENTS (PAGE_CACHE_SIZE * 8)
 135#endif
 136
 137/** COMPAT_146: this is an OST (temporary) */
 138#define OBD_COMPAT_OST    0x00000002
 139/** COMPAT_146: this is an MDT (temporary) */
 140#define OBD_COMPAT_MDT    0x00000004
 141/** 2.0 server, interop flag to show server version is changed */
 142#define OBD_COMPAT_20      0x00000008
 143
 144/** MDS handles LOV_OBJID file */
 145#define OBD_ROCOMPAT_LOVOBJID   0x00000001
 146
 147/** OST handles group subdirs */
 148#define OBD_INCOMPAT_GROUPS     0x00000001
 149/** this is an OST */
 150#define OBD_INCOMPAT_OST        0x00000002
 151/** this is an MDT */
 152#define OBD_INCOMPAT_MDT        0x00000004
 153/** common last_rvcd format */
 154#define OBD_INCOMPAT_COMMON_LR  0x00000008
 155/** FID is enabled */
 156#define OBD_INCOMPAT_FID        0x00000010
 157/** Size-on-MDS is enabled */
 158#define OBD_INCOMPAT_SOM        0x00000020
 159/** filesystem using iam format to store directory entries */
 160#define OBD_INCOMPAT_IAM_DIR    0x00000040
 161/** LMA attribute contains per-inode incompatible flags */
 162#define OBD_INCOMPAT_LMA        0x00000080
 163/** lmm_stripe_count has been shrunk from __u32 to __u16 and the remaining 16
 164 * bits are now used to store a generation. Once we start changing the layout
 165 * and bumping the generation, old versions expecting a 32-bit lmm_stripe_count
 166 * will be confused by interpreting stripe_count | gen << 16 as the actual
 167 * stripe count */
 168#define OBD_INCOMPAT_LMM_VER    0x00000100
 169/** multiple OI files for MDT */
 170#define OBD_INCOMPAT_MULTI_OI   0x00000200
 171
 172/* Data stored per server at the head of the last_rcvd file.  In le32 order.
 173   This should be common to filter_internal.h, lustre_mds.h */
 174struct lr_server_data {
 175        __u8  lsd_uuid[40];     /* server UUID */
 176        __u64 lsd_last_transno;    /* last completed transaction ID */
 177        __u64 lsd_compat14;     /* reserved - compat with old last_rcvd */
 178        __u64 lsd_mount_count;     /* incarnation number */
 179        __u32 lsd_feature_compat;  /* compatible feature flags */
 180        __u32 lsd_feature_rocompat;/* read-only compatible feature flags */
 181        __u32 lsd_feature_incompat;/* incompatible feature flags */
 182        __u32 lsd_server_size;     /* size of server data area */
 183        __u32 lsd_client_start;    /* start of per-client data area */
 184        __u16 lsd_client_size;     /* size of per-client data area */
 185        __u16 lsd_subdir_count;    /* number of subdirectories for objects */
 186        __u64 lsd_catalog_oid;     /* recovery catalog object id */
 187        __u32 lsd_catalog_ogen;    /* recovery catalog inode generation */
 188        __u8  lsd_peeruuid[40];    /* UUID of MDS associated with this OST */
 189        __u32 lsd_osd_index;       /* index number of OST in LOV */
 190        __u32 lsd_padding1;     /* was lsd_mdt_index, unused in 2.4.0 */
 191        __u32 lsd_start_epoch;     /* VBR: start epoch from last boot */
 192        /** transaction values since lsd_trans_table_time */
 193        __u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
 194        /** start point of transno table below */
 195        __u32 lsd_trans_table_time; /* time of first slot in table above */
 196        __u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
 197        __u8  lsd_padding[LR_SERVER_SIZE - 288];
 198};
 199
 200/* Data stored per client in the last_rcvd file.  In le32 order. */
 201struct lsd_client_data {
 202        __u8  lcd_uuid[40];      /* client UUID */
 203        __u64 lcd_last_transno; /* last completed transaction ID */
 204        __u64 lcd_last_xid;     /* xid for the last transaction */
 205        __u32 lcd_last_result;  /* result from last RPC */
 206        __u32 lcd_last_data;    /* per-op data (disposition for open &c.) */
 207        /* for MDS_CLOSE requests */
 208        __u64 lcd_last_close_transno; /* last completed transaction ID */
 209        __u64 lcd_last_close_xid;     /* xid for the last transaction */
 210        __u32 lcd_last_close_result;  /* result from last RPC */
 211        __u32 lcd_last_close_data;    /* per-op data */
 212        /* VBR: last versions */
 213        __u64 lcd_pre_versions[4];
 214        __u32 lcd_last_epoch;
 215        /** orphans handling for delayed export rely on that */
 216        __u32 lcd_first_epoch;
 217        __u8  lcd_padding[LR_CLIENT_SIZE - 128];
 218};
 219
 220/* bug20354: the lcd_uuid for export of clients may be wrong */
 221static inline void check_lcd(char *obd_name, int index,
 222                             struct lsd_client_data *lcd)
 223{
 224        int length = sizeof(lcd->lcd_uuid);
 225
 226        if (strnlen((char *)lcd->lcd_uuid, length) == length) {
 227                lcd->lcd_uuid[length - 1] = '\0';
 228
 229                LCONSOLE_ERROR("the client UUID (%s) on %s for exports stored in last_rcvd(index = %d) is bad!\n",
 230                               lcd->lcd_uuid, obd_name, index);
 231        }
 232}
 233
 234/* last_rcvd handling */
 235static inline void lsd_le_to_cpu(struct lr_server_data *buf,
 236                                 struct lr_server_data *lsd)
 237{
 238        int i;
 239
 240        memcpy(lsd->lsd_uuid, buf->lsd_uuid, sizeof(lsd->lsd_uuid));
 241        lsd->lsd_last_transno     = le64_to_cpu(buf->lsd_last_transno);
 242        lsd->lsd_compat14        = le64_to_cpu(buf->lsd_compat14);
 243        lsd->lsd_mount_count      = le64_to_cpu(buf->lsd_mount_count);
 244        lsd->lsd_feature_compat   = le32_to_cpu(buf->lsd_feature_compat);
 245        lsd->lsd_feature_rocompat = le32_to_cpu(buf->lsd_feature_rocompat);
 246        lsd->lsd_feature_incompat = le32_to_cpu(buf->lsd_feature_incompat);
 247        lsd->lsd_server_size      = le32_to_cpu(buf->lsd_server_size);
 248        lsd->lsd_client_start     = le32_to_cpu(buf->lsd_client_start);
 249        lsd->lsd_client_size      = le16_to_cpu(buf->lsd_client_size);
 250        lsd->lsd_subdir_count     = le16_to_cpu(buf->lsd_subdir_count);
 251        lsd->lsd_catalog_oid      = le64_to_cpu(buf->lsd_catalog_oid);
 252        lsd->lsd_catalog_ogen     = le32_to_cpu(buf->lsd_catalog_ogen);
 253        memcpy(lsd->lsd_peeruuid, buf->lsd_peeruuid, sizeof(lsd->lsd_peeruuid));
 254        lsd->lsd_osd_index      = le32_to_cpu(buf->lsd_osd_index);
 255        lsd->lsd_padding1       = le32_to_cpu(buf->lsd_padding1);
 256        lsd->lsd_start_epoch      = le32_to_cpu(buf->lsd_start_epoch);
 257        for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
 258                lsd->lsd_trans_table[i] = le64_to_cpu(buf->lsd_trans_table[i]);
 259        lsd->lsd_trans_table_time = le32_to_cpu(buf->lsd_trans_table_time);
 260        lsd->lsd_expire_intervals = le32_to_cpu(buf->lsd_expire_intervals);
 261}
 262
 263static inline void lsd_cpu_to_le(struct lr_server_data *lsd,
 264                                 struct lr_server_data *buf)
 265{
 266        int i;
 267
 268        memcpy(buf->lsd_uuid, lsd->lsd_uuid, sizeof(buf->lsd_uuid));
 269        buf->lsd_last_transno     = cpu_to_le64(lsd->lsd_last_transno);
 270        buf->lsd_compat14        = cpu_to_le64(lsd->lsd_compat14);
 271        buf->lsd_mount_count      = cpu_to_le64(lsd->lsd_mount_count);
 272        buf->lsd_feature_compat   = cpu_to_le32(lsd->lsd_feature_compat);
 273        buf->lsd_feature_rocompat = cpu_to_le32(lsd->lsd_feature_rocompat);
 274        buf->lsd_feature_incompat = cpu_to_le32(lsd->lsd_feature_incompat);
 275        buf->lsd_server_size      = cpu_to_le32(lsd->lsd_server_size);
 276        buf->lsd_client_start     = cpu_to_le32(lsd->lsd_client_start);
 277        buf->lsd_client_size      = cpu_to_le16(lsd->lsd_client_size);
 278        buf->lsd_subdir_count     = cpu_to_le16(lsd->lsd_subdir_count);
 279        buf->lsd_catalog_oid      = cpu_to_le64(lsd->lsd_catalog_oid);
 280        buf->lsd_catalog_ogen     = cpu_to_le32(lsd->lsd_catalog_ogen);
 281        memcpy(buf->lsd_peeruuid, lsd->lsd_peeruuid, sizeof(buf->lsd_peeruuid));
 282        buf->lsd_osd_index        = cpu_to_le32(lsd->lsd_osd_index);
 283        buf->lsd_padding1         = cpu_to_le32(lsd->lsd_padding1);
 284        buf->lsd_start_epoch      = cpu_to_le32(lsd->lsd_start_epoch);
 285        for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
 286                buf->lsd_trans_table[i] = cpu_to_le64(lsd->lsd_trans_table[i]);
 287        buf->lsd_trans_table_time = cpu_to_le32(lsd->lsd_trans_table_time);
 288        buf->lsd_expire_intervals = cpu_to_le32(lsd->lsd_expire_intervals);
 289}
 290
 291static inline void lcd_le_to_cpu(struct lsd_client_data *buf,
 292                                 struct lsd_client_data *lcd)
 293{
 294        memcpy(lcd->lcd_uuid, buf->lcd_uuid, sizeof (lcd->lcd_uuid));
 295        lcd->lcd_last_transno       = le64_to_cpu(buf->lcd_last_transno);
 296        lcd->lcd_last_xid          = le64_to_cpu(buf->lcd_last_xid);
 297        lcd->lcd_last_result    = le32_to_cpu(buf->lcd_last_result);
 298        lcd->lcd_last_data        = le32_to_cpu(buf->lcd_last_data);
 299        lcd->lcd_last_close_transno = le64_to_cpu(buf->lcd_last_close_transno);
 300        lcd->lcd_last_close_xid     = le64_to_cpu(buf->lcd_last_close_xid);
 301        lcd->lcd_last_close_result  = le32_to_cpu(buf->lcd_last_close_result);
 302        lcd->lcd_last_close_data    = le32_to_cpu(buf->lcd_last_close_data);
 303        lcd->lcd_pre_versions[0]    = le64_to_cpu(buf->lcd_pre_versions[0]);
 304        lcd->lcd_pre_versions[1]    = le64_to_cpu(buf->lcd_pre_versions[1]);
 305        lcd->lcd_pre_versions[2]    = le64_to_cpu(buf->lcd_pre_versions[2]);
 306        lcd->lcd_pre_versions[3]    = le64_to_cpu(buf->lcd_pre_versions[3]);
 307        lcd->lcd_last_epoch      = le32_to_cpu(buf->lcd_last_epoch);
 308        lcd->lcd_first_epoch    = le32_to_cpu(buf->lcd_first_epoch);
 309}
 310
 311static inline void lcd_cpu_to_le(struct lsd_client_data *lcd,
 312                                 struct lsd_client_data *buf)
 313{
 314        memcpy(buf->lcd_uuid, lcd->lcd_uuid, sizeof (lcd->lcd_uuid));
 315        buf->lcd_last_transno       = cpu_to_le64(lcd->lcd_last_transno);
 316        buf->lcd_last_xid          = cpu_to_le64(lcd->lcd_last_xid);
 317        buf->lcd_last_result    = cpu_to_le32(lcd->lcd_last_result);
 318        buf->lcd_last_data        = cpu_to_le32(lcd->lcd_last_data);
 319        buf->lcd_last_close_transno = cpu_to_le64(lcd->lcd_last_close_transno);
 320        buf->lcd_last_close_xid     = cpu_to_le64(lcd->lcd_last_close_xid);
 321        buf->lcd_last_close_result  = cpu_to_le32(lcd->lcd_last_close_result);
 322        buf->lcd_last_close_data    = cpu_to_le32(lcd->lcd_last_close_data);
 323        buf->lcd_pre_versions[0]    = cpu_to_le64(lcd->lcd_pre_versions[0]);
 324        buf->lcd_pre_versions[1]    = cpu_to_le64(lcd->lcd_pre_versions[1]);
 325        buf->lcd_pre_versions[2]    = cpu_to_le64(lcd->lcd_pre_versions[2]);
 326        buf->lcd_pre_versions[3]    = cpu_to_le64(lcd->lcd_pre_versions[3]);
 327        buf->lcd_last_epoch      = cpu_to_le32(lcd->lcd_last_epoch);
 328        buf->lcd_first_epoch    = cpu_to_le32(lcd->lcd_first_epoch);
 329}
 330
 331static inline __u64 lcd_last_transno(struct lsd_client_data *lcd)
 332{
 333        return (lcd->lcd_last_transno > lcd->lcd_last_close_transno ?
 334                lcd->lcd_last_transno : lcd->lcd_last_close_transno);
 335}
 336
 337static inline __u64 lcd_last_xid(struct lsd_client_data *lcd)
 338{
 339        return (lcd->lcd_last_xid > lcd->lcd_last_close_xid ?
 340                lcd->lcd_last_xid : lcd->lcd_last_close_xid);
 341}
 342
 343/****************** superblock additional info *********************/
 344
 345struct ll_sb_info;
 346
 347struct lustre_sb_info {
 348        int                    lsi_flags;
 349        struct obd_device       *lsi_mgc;     /* mgc obd */
 350        struct lustre_mount_data *lsi_lmd;     /* mount command info */
 351        struct ll_sb_info       *lsi_llsbi;   /* add'l client sbi info */
 352        struct dt_device         *lsi_dt_dev;  /* dt device to access disk fs*/
 353        struct vfsmount   *lsi_srv_mnt; /* the one server mount */
 354        atomic_t              lsi_mounts;  /* references to the srv_mnt */
 355        char                      lsi_svname[MTI_NAME_MAXLEN];
 356        char                      lsi_osd_obdname[64];
 357        char                      lsi_osd_uuid[64];
 358        struct obd_export        *lsi_osd_exp;
 359        char                      lsi_osd_type[16];
 360        char                      lsi_fstype[16];
 361        struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
 362                                                  own backing_dev_info */
 363};
 364
 365#define LSI_UMOUNT_FAILOVER           0x00200000
 366#define LSI_BDI_INITIALIZED           0x00400000
 367
 368#define     s2lsi(sb)   ((struct lustre_sb_info *)((sb)->s_fs_info))
 369#define     s2lsi_nocast(sb) ((sb)->s_fs_info)
 370
 371#define     get_profile_name(sb)   (s2lsi(sb)->lsi_lmd->lmd_profile)
 372#define     get_mount_flags(sb)    (s2lsi(sb)->lsi_lmd->lmd_flags)
 373#define     get_mntdev_name(sb)    (s2lsi(sb)->lsi_lmd->lmd_dev)
 374
 375/****************** mount lookup info *********************/
 376
 377struct lustre_mount_info {
 378        char             *lmi_name;
 379        struct super_block   *lmi_sb;
 380        struct vfsmount      *lmi_mnt;
 381        struct list_head            lmi_list_chain;
 382};
 383
 384/****************** prototypes *********************/
 385
 386/* obd_mount.c */
 387
 388int lustre_start_mgc(struct super_block *sb);
 389void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
 390                                                  struct vfsmount *mnt));
 391void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
 392int lustre_common_put_super(struct super_block *sb);
 393
 394int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
 395
 396/** @} disk */
 397
 398#endif /* _LUSTRE_DISK_H */
 399