linux/include/linux/ceph/osd_client.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _FS_CEPH_OSD_CLIENT_H
   3#define _FS_CEPH_OSD_CLIENT_H
   4
   5#include <linux/bitrev.h>
   6#include <linux/completion.h>
   7#include <linux/kref.h>
   8#include <linux/mempool.h>
   9#include <linux/rbtree.h>
  10#include <linux/refcount.h>
  11#include <linux/ktime.h>
  12
  13#include <linux/ceph/types.h>
  14#include <linux/ceph/osdmap.h>
  15#include <linux/ceph/messenger.h>
  16#include <linux/ceph/msgpool.h>
  17#include <linux/ceph/auth.h>
  18#include <linux/ceph/pagelist.h>
  19
  20struct ceph_msg;
  21struct ceph_snap_context;
  22struct ceph_osd_request;
  23struct ceph_osd_client;
  24
  25/*
  26 * completion callback for async writepages
  27 */
  28typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
  29
  30#define CEPH_HOMELESS_OSD       -1
  31
  32/* a given osd we're communicating with */
  33struct ceph_osd {
  34        refcount_t o_ref;
  35        struct ceph_osd_client *o_osdc;
  36        int o_osd;
  37        int o_incarnation;
  38        struct rb_node o_node;
  39        struct ceph_connection o_con;
  40        struct rb_root o_requests;
  41        struct rb_root o_linger_requests;
  42        struct rb_root o_backoff_mappings;
  43        struct rb_root o_backoffs_by_id;
  44        struct list_head o_osd_lru;
  45        struct ceph_auth_handshake o_auth;
  46        unsigned long lru_ttl;
  47        struct list_head o_keepalive_item;
  48        struct mutex lock;
  49};
  50
  51#define CEPH_OSD_SLAB_OPS       2
  52#define CEPH_OSD_MAX_OPS        16
  53
  54enum ceph_osd_data_type {
  55        CEPH_OSD_DATA_TYPE_NONE = 0,
  56        CEPH_OSD_DATA_TYPE_PAGES,
  57        CEPH_OSD_DATA_TYPE_PAGELIST,
  58#ifdef CONFIG_BLOCK
  59        CEPH_OSD_DATA_TYPE_BIO,
  60#endif /* CONFIG_BLOCK */
  61        CEPH_OSD_DATA_TYPE_BVECS,
  62};
  63
  64struct ceph_osd_data {
  65        enum ceph_osd_data_type type;
  66        union {
  67                struct {
  68                        struct page     **pages;
  69                        u64             length;
  70                        u32             alignment;
  71                        bool            pages_from_pool;
  72                        bool            own_pages;
  73                };
  74                struct ceph_pagelist    *pagelist;
  75#ifdef CONFIG_BLOCK
  76                struct {
  77                        struct ceph_bio_iter    bio_pos;
  78                        u32                     bio_length;
  79                };
  80#endif /* CONFIG_BLOCK */
  81                struct {
  82                        struct ceph_bvec_iter   bvec_pos;
  83                        u32                     num_bvecs;
  84                };
  85        };
  86};
  87
  88struct ceph_osd_req_op {
  89        u16 op;           /* CEPH_OSD_OP_* */
  90        u32 flags;        /* CEPH_OSD_OP_FLAG_* */
  91        u32 indata_len;   /* request */
  92        u32 outdata_len;  /* reply */
  93        s32 rval;
  94
  95        union {
  96                struct ceph_osd_data raw_data_in;
  97                struct {
  98                        u64 offset, length;
  99                        u64 truncate_size;
 100                        u32 truncate_seq;
 101                        struct ceph_osd_data osd_data;
 102                } extent;
 103                struct {
 104                        u32 name_len;
 105                        u32 value_len;
 106                        __u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
 107                        __u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
 108                        struct ceph_osd_data osd_data;
 109                } xattr;
 110                struct {
 111                        const char *class_name;
 112                        const char *method_name;
 113                        struct ceph_osd_data request_info;
 114                        struct ceph_osd_data request_data;
 115                        struct ceph_osd_data response_data;
 116                        __u8 class_len;
 117                        __u8 method_len;
 118                        u32 indata_len;
 119                } cls;
 120                struct {
 121                        u64 cookie;
 122                        __u8 op;           /* CEPH_OSD_WATCH_OP_ */
 123                        u32 gen;
 124                } watch;
 125                struct {
 126                        struct ceph_osd_data request_data;
 127                } notify_ack;
 128                struct {
 129                        u64 cookie;
 130                        struct ceph_osd_data request_data;
 131                        struct ceph_osd_data response_data;
 132                } notify;
 133                struct {
 134                        struct ceph_osd_data response_data;
 135                } list_watchers;
 136                struct {
 137                        u64 expected_object_size;
 138                        u64 expected_write_size;
 139                        u32 flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
 140                } alloc_hint;
 141                struct {
 142                        u64 snapid;
 143                        u64 src_version;
 144                        u8 flags;
 145                        u32 src_fadvise_flags;
 146                        struct ceph_osd_data osd_data;
 147                } copy_from;
 148        };
 149};
 150
 151struct ceph_osd_request_target {
 152        struct ceph_object_id base_oid;
 153        struct ceph_object_locator base_oloc;
 154        struct ceph_object_id target_oid;
 155        struct ceph_object_locator target_oloc;
 156
 157        struct ceph_pg pgid;               /* last raw pg we mapped to */
 158        struct ceph_spg spgid;             /* last actual spg we mapped to */
 159        u32 pg_num;
 160        u32 pg_num_mask;
 161        struct ceph_osds acting;
 162        struct ceph_osds up;
 163        int size;
 164        int min_size;
 165        bool sort_bitwise;
 166        bool recovery_deletes;
 167
 168        unsigned int flags;                /* CEPH_OSD_FLAG_* */
 169        bool used_replica;
 170        bool paused;
 171
 172        u32 epoch;
 173        u32 last_force_resend;
 174
 175        int osd;
 176};
 177
 178/* an in-flight request */
 179struct ceph_osd_request {
 180        u64             r_tid;              /* unique for this client */
 181        struct rb_node  r_node;
 182        struct rb_node  r_mc_node;          /* map check */
 183        struct work_struct r_complete_work;
 184        struct ceph_osd *r_osd;
 185
 186        struct ceph_osd_request_target r_t;
 187#define r_base_oid      r_t.base_oid
 188#define r_base_oloc     r_t.base_oloc
 189#define r_flags         r_t.flags
 190
 191        struct ceph_msg  *r_request, *r_reply;
 192        u32               r_sent;      /* >0 if r_request is sending/sent */
 193
 194        /* request osd ops array  */
 195        unsigned int            r_num_ops;
 196
 197        int               r_result;
 198
 199        struct ceph_osd_client *r_osdc;
 200        struct kref       r_kref;
 201        bool              r_mempool;
 202        struct completion r_completion;       /* private to osd_client.c */
 203        ceph_osdc_callback_t r_callback;
 204
 205        struct inode *r_inode;                /* for use by callbacks */
 206        struct list_head r_private_item;      /* ditto */
 207        void *r_priv;                         /* ditto */
 208
 209        /* set by submitter */
 210        u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
 211        struct ceph_snap_context *r_snapc;    /* for writes */
 212        struct timespec64 r_mtime;            /* ditto */
 213        u64 r_data_offset;                    /* ditto */
 214        bool r_linger;                        /* don't resend on failure */
 215
 216        /* internal */
 217        unsigned long r_stamp;                /* jiffies, send or check time */
 218        unsigned long r_start_stamp;          /* jiffies */
 219        ktime_t r_start_latency;              /* ktime_t */
 220        ktime_t r_end_latency;                /* ktime_t */
 221        int r_attempts;
 222        u32 r_map_dne_bound;
 223
 224        struct ceph_osd_req_op r_ops[];
 225};
 226
 227struct ceph_request_redirect {
 228        struct ceph_object_locator oloc;
 229};
 230
 231/*
 232 * osd request identifier
 233 *
 234 * caller name + incarnation# + tid to unique identify this request
 235 */
 236struct ceph_osd_reqid {
 237        struct ceph_entity_name name;
 238        __le64 tid;
 239        __le32 inc;
 240} __packed;
 241
 242struct ceph_blkin_trace_info {
 243        __le64 trace_id;
 244        __le64 span_id;
 245        __le64 parent_span_id;
 246} __packed;
 247
 248typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
 249                                 u64 notifier_id, void *data, size_t data_len);
 250typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
 251
 252struct ceph_osd_linger_request {
 253        struct ceph_osd_client *osdc;
 254        u64 linger_id;
 255        bool committed;
 256        bool is_watch;                  /* watch or notify */
 257
 258        struct ceph_osd *osd;
 259        struct ceph_osd_request *reg_req;
 260        struct ceph_osd_request *ping_req;
 261        unsigned long ping_sent;
 262        unsigned long watch_valid_thru;
 263        struct list_head pending_lworks;
 264
 265        struct ceph_osd_request_target t;
 266        u32 map_dne_bound;
 267
 268        struct timespec64 mtime;
 269
 270        struct kref kref;
 271        struct mutex lock;
 272        struct rb_node node;            /* osd */
 273        struct rb_node osdc_node;       /* osdc */
 274        struct rb_node mc_node;         /* map check */
 275        struct list_head scan_item;
 276
 277        struct completion reg_commit_wait;
 278        struct completion notify_finish_wait;
 279        int reg_commit_error;
 280        int notify_finish_error;
 281        int last_error;
 282
 283        u32 register_gen;
 284        u64 notify_id;
 285
 286        rados_watchcb2_t wcb;
 287        rados_watcherrcb_t errcb;
 288        void *data;
 289
 290        struct page ***preply_pages;
 291        size_t *preply_len;
 292};
 293
 294struct ceph_watch_item {
 295        struct ceph_entity_name name;
 296        u64 cookie;
 297        struct ceph_entity_addr addr;
 298};
 299
 300struct ceph_spg_mapping {
 301        struct rb_node node;
 302        struct ceph_spg spgid;
 303
 304        struct rb_root backoffs;
 305};
 306
 307struct ceph_hobject_id {
 308        void *key;
 309        size_t key_len;
 310        void *oid;
 311        size_t oid_len;
 312        u64 snapid;
 313        u32 hash;
 314        u8 is_max;
 315        void *nspace;
 316        size_t nspace_len;
 317        s64 pool;
 318
 319        /* cache */
 320        u32 hash_reverse_bits;
 321};
 322
 323static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
 324{
 325        hoid->hash_reverse_bits = bitrev32(hoid->hash);
 326}
 327
 328/*
 329 * PG-wide backoff: [begin, end)
 330 * per-object backoff: begin == end
 331 */
 332struct ceph_osd_backoff {
 333        struct rb_node spg_node;
 334        struct rb_node id_node;
 335
 336        struct ceph_spg spgid;
 337        u64 id;
 338        struct ceph_hobject_id *begin;
 339        struct ceph_hobject_id *end;
 340};
 341
 342#define CEPH_LINGER_ID_START    0xffff000000000000ULL
 343
 344struct ceph_osd_client {
 345        struct ceph_client     *client;
 346
 347        struct ceph_osdmap     *osdmap;       /* current map */
 348        struct rw_semaphore    lock;
 349
 350        struct rb_root         osds;          /* osds */
 351        struct list_head       osd_lru;       /* idle osds */
 352        spinlock_t             osd_lru_lock;
 353        u32                    epoch_barrier;
 354        struct ceph_osd        homeless_osd;
 355        atomic64_t             last_tid;      /* tid of last request */
 356        u64                    last_linger_id;
 357        struct rb_root         linger_requests; /* lingering requests */
 358        struct rb_root         map_checks;
 359        struct rb_root         linger_map_checks;
 360        atomic_t               num_requests;
 361        atomic_t               num_homeless;
 362        int                    abort_err;
 363        struct delayed_work    timeout_work;
 364        struct delayed_work    osds_timeout_work;
 365#ifdef CONFIG_DEBUG_FS
 366        struct dentry          *debugfs_file;
 367#endif
 368
 369        mempool_t              *req_mempool;
 370
 371        struct ceph_msgpool     msgpool_op;
 372        struct ceph_msgpool     msgpool_op_reply;
 373
 374        struct workqueue_struct *notify_wq;
 375        struct workqueue_struct *completion_wq;
 376};
 377
 378static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
 379{
 380        return osdc->osdmap->flags & flag;
 381}
 382
 383extern int ceph_osdc_setup(void);
 384extern void ceph_osdc_cleanup(void);
 385
 386extern int ceph_osdc_init(struct ceph_osd_client *osdc,
 387                          struct ceph_client *client);
 388extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
 389extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
 390
 391extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 392                                   struct ceph_msg *msg);
 393extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 394                                 struct ceph_msg *msg);
 395void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
 396void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
 397void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
 398
 399#define osd_req_op_data(oreq, whch, typ, fld)                           \
 400({                                                                      \
 401        struct ceph_osd_request *__oreq = (oreq);                       \
 402        unsigned int __whch = (whch);                                   \
 403        BUG_ON(__whch >= __oreq->r_num_ops);                            \
 404        &__oreq->r_ops[__whch].typ.fld;                                 \
 405})
 406
 407struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req,
 408                            unsigned int which, u16 opcode, u32 flags);
 409
 410extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
 411                                        unsigned int which,
 412                                        struct page **pages, u64 length,
 413                                        u32 alignment, bool pages_from_pool,
 414                                        bool own_pages);
 415
 416extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
 417                                        unsigned int which, u16 opcode,
 418                                        u64 offset, u64 length,
 419                                        u64 truncate_size, u32 truncate_seq);
 420extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
 421                                        unsigned int which, u64 length);
 422extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
 423                                       unsigned int which, u64 offset_inc);
 424
 425extern struct ceph_osd_data *osd_req_op_extent_osd_data(
 426                                        struct ceph_osd_request *osd_req,
 427                                        unsigned int which);
 428
 429extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
 430                                        unsigned int which,
 431                                        struct page **pages, u64 length,
 432                                        u32 alignment, bool pages_from_pool,
 433                                        bool own_pages);
 434extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
 435                                        unsigned int which,
 436                                        struct ceph_pagelist *pagelist);
 437#ifdef CONFIG_BLOCK
 438void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
 439                                    unsigned int which,
 440                                    struct ceph_bio_iter *bio_pos,
 441                                    u32 bio_length);
 442#endif /* CONFIG_BLOCK */
 443void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
 444                                      unsigned int which,
 445                                      struct bio_vec *bvecs, u32 num_bvecs,
 446                                      u32 bytes);
 447void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
 448                                         unsigned int which,
 449                                         struct ceph_bvec_iter *bvec_pos);
 450
 451extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
 452                                        unsigned int which,
 453                                        struct ceph_pagelist *pagelist);
 454extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
 455                                        unsigned int which,
 456                                        struct page **pages, u64 length,
 457                                        u32 alignment, bool pages_from_pool,
 458                                        bool own_pages);
 459void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
 460                                       unsigned int which,
 461                                       struct bio_vec *bvecs, u32 num_bvecs,
 462                                       u32 bytes);
 463extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 464                                        unsigned int which,
 465                                        struct page **pages, u64 length,
 466                                        u32 alignment, bool pages_from_pool,
 467                                        bool own_pages);
 468int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 469                        const char *class, const char *method);
 470extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 471                                 u16 opcode, const char *name, const void *value,
 472                                 size_t size, u8 cmp_op, u8 cmp_mode);
 473extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
 474                                       unsigned int which,
 475                                       u64 expected_object_size,
 476                                       u64 expected_write_size,
 477                                       u32 flags);
 478
 479extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 480                                               struct ceph_snap_context *snapc,
 481                                               unsigned int num_ops,
 482                                               bool use_mempool,
 483                                               gfp_t gfp_flags);
 484int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
 485
 486extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 487                                      struct ceph_file_layout *layout,
 488                                      struct ceph_vino vino,
 489                                      u64 offset, u64 *len,
 490                                      unsigned int which, int num_ops,
 491                                      int opcode, int flags,
 492                                      struct ceph_snap_context *snapc,
 493                                      u32 truncate_seq, u64 truncate_size,
 494                                      bool use_mempool);
 495
 496extern void ceph_osdc_get_request(struct ceph_osd_request *req);
 497extern void ceph_osdc_put_request(struct ceph_osd_request *req);
 498
 499extern int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 500                                   struct ceph_osd_request *req,
 501                                   bool nofail);
 502extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
 503extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
 504                                  struct ceph_osd_request *req);
 505extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
 506
 507extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
 508void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
 509
 510int ceph_osdc_call(struct ceph_osd_client *osdc,
 511                   struct ceph_object_id *oid,
 512                   struct ceph_object_locator *oloc,
 513                   const char *class, const char *method,
 514                   unsigned int flags,
 515                   struct page *req_page, size_t req_len,
 516                   struct page **resp_pages, size_t *resp_len);
 517
 518int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
 519                        u64 src_snapid, u64 src_version,
 520                        struct ceph_object_id *src_oid,
 521                        struct ceph_object_locator *src_oloc,
 522                        u32 src_fadvise_flags,
 523                        struct ceph_object_id *dst_oid,
 524                        struct ceph_object_locator *dst_oloc,
 525                        u32 dst_fadvise_flags,
 526                        u32 truncate_seq, u64 truncate_size,
 527                        u8 copy_from_flags);
 528
 529/* watch/notify */
 530struct ceph_osd_linger_request *
 531ceph_osdc_watch(struct ceph_osd_client *osdc,
 532                struct ceph_object_id *oid,
 533                struct ceph_object_locator *oloc,
 534                rados_watchcb2_t wcb,
 535                rados_watcherrcb_t errcb,
 536                void *data);
 537int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
 538                      struct ceph_osd_linger_request *lreq);
 539
 540int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
 541                         struct ceph_object_id *oid,
 542                         struct ceph_object_locator *oloc,
 543                         u64 notify_id,
 544                         u64 cookie,
 545                         void *payload,
 546                         u32 payload_len);
 547int ceph_osdc_notify(struct ceph_osd_client *osdc,
 548                     struct ceph_object_id *oid,
 549                     struct ceph_object_locator *oloc,
 550                     void *payload,
 551                     u32 payload_len,
 552                     u32 timeout,
 553                     struct page ***preply_pages,
 554                     size_t *preply_len);
 555int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
 556                          struct ceph_osd_linger_request *lreq);
 557int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
 558                            struct ceph_object_id *oid,
 559                            struct ceph_object_locator *oloc,
 560                            struct ceph_watch_item **watchers,
 561                            u32 *num_watchers);
 562#endif
 563
 564