linux/include/linux/ceph/osdmap.h
<<
>>
Prefs
   1#ifndef _FS_CEPH_OSDMAP_H
   2#define _FS_CEPH_OSDMAP_H
   3
   4#include <linux/rbtree.h>
   5#include <linux/ceph/types.h>
   6#include <linux/ceph/decode.h>
   7#include <linux/ceph/ceph_fs.h>
   8#include <linux/crush/crush.h>
   9
  10/*
  11 * The osd map describes the current membership of the osd cluster and
  12 * specifies the mapping of objects to placement groups and placement
  13 * groups to (sets of) osds.  That is, it completely specifies the
  14 * (desired) distribution of all data objects in the system at some
  15 * point in time.
  16 *
  17 * Each map version is identified by an epoch, which increases monotonically.
  18 *
  19 * The map can be updated either via an incremental map (diff) describing
  20 * the change between two successive epochs, or as a fully encoded map.
  21 */
  22struct ceph_pg {
  23        uint64_t pool;
  24        uint32_t seed;
  25};
  26
  27#define CEPH_POOL_FLAG_HASHPSPOOL  1
  28
  29struct ceph_pg_pool_info {
  30        struct rb_node node;
  31        s64 id;
  32        u8 type;
  33        u8 size;
  34        u8 crush_ruleset;
  35        u8 object_hash;
  36        u32 pg_num, pgp_num;
  37        int pg_num_mask, pgp_num_mask;
  38        u64 flags;
  39        char *name;
  40};
  41
  42struct ceph_object_locator {
  43        uint64_t pool;
  44        char *key;
  45};
  46
  47struct ceph_pg_mapping {
  48        struct rb_node node;
  49        struct ceph_pg pgid;
  50        int len;
  51        int osds[];
  52};
  53
  54struct ceph_osdmap {
  55        struct ceph_fsid fsid;
  56        u32 epoch;
  57        u32 mkfs_epoch;
  58        struct ceph_timespec created, modified;
  59
  60        u32 flags;         /* CEPH_OSDMAP_* */
  61
  62        u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
  63        u8 *osd_state;     /* CEPH_OSD_* */
  64        u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
  65        struct ceph_entity_addr *osd_addr;
  66
  67        struct rb_root pg_temp;
  68        struct rb_root pg_pools;
  69        u32 pool_max;
  70
  71        /* the CRUSH map specifies the mapping of placement groups to
  72         * the list of osds that store+replicate them. */
  73        struct crush_map *crush;
  74};
  75
  76/*
  77 * file layout helpers
  78 */
  79#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
  80#define ceph_file_layout_stripe_count(l) \
  81        ((__s32)le32_to_cpu((l).fl_stripe_count))
  82#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
  83#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
  84#define ceph_file_layout_object_su(l) \
  85        ((__s32)le32_to_cpu((l).fl_object_stripe_unit))
  86#define ceph_file_layout_pg_pool(l) \
  87        ((__s32)le32_to_cpu((l).fl_pg_pool))
  88
  89static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
  90{
  91        return le32_to_cpu(l->fl_stripe_unit) *
  92                le32_to_cpu(l->fl_stripe_count);
  93}
  94
  95/* "period" == bytes before i start on a new set of objects */
  96static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
  97{
  98        return le32_to_cpu(l->fl_object_size) *
  99                le32_to_cpu(l->fl_stripe_count);
 100}
 101
 102
 103static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
 104{
 105        return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
 106}
 107
 108static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
 109{
 110        return map && (map->flags & flag);
 111}
 112
 113extern char *ceph_osdmap_state_str(char *str, int len, int state);
 114
 115static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
 116                                                     int osd)
 117{
 118        if (osd >= map->max_osd)
 119                return NULL;
 120        return &map->osd_addr[osd];
 121}
 122
 123static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
 124{
 125        __u8 version;
 126
 127        if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
 128                pr_warning("incomplete pg encoding");
 129
 130                return -EINVAL;
 131        }
 132        version = ceph_decode_8(p);
 133        if (version > 1) {
 134                pr_warning("do not understand pg encoding %d > 1",
 135                        (int)version);
 136                return -EINVAL;
 137        }
 138
 139        pgid->pool = ceph_decode_64(p);
 140        pgid->seed = ceph_decode_32(p);
 141        *p += 4;        /* skip deprecated preferred value */
 142
 143        return 0;
 144}
 145
 146extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
 147extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
 148                                            struct ceph_osdmap *map,
 149                                            struct ceph_messenger *msgr);
 150extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
 151
 152/* calculate mapping of a file extent to an object */
 153extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 154                                         u64 off, u64 len,
 155                                         u64 *bno, u64 *oxoff, u64 *oxlen);
 156
 157/* calculate mapping of object to a placement group */
 158extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
 159                          struct ceph_osdmap *osdmap, uint64_t pool);
 160extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
 161                               struct ceph_pg pgid,
 162                               int *acting);
 163extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
 164                                struct ceph_pg pgid);
 165
 166extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
 167extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
 168
 169#endif
 170