linux/include/linux/ceph/messenger.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef __FS_CEPH_MESSENGER_H
   3#define __FS_CEPH_MESSENGER_H
   4
   5#include <linux/bvec.h>
   6#include <linux/kref.h>
   7#include <linux/mutex.h>
   8#include <linux/net.h>
   9#include <linux/radix-tree.h>
  10#include <linux/uio.h>
  11#include <linux/workqueue.h>
  12#include <net/net_namespace.h>
  13
  14#include <linux/ceph/types.h>
  15#include <linux/ceph/buffer.h>
  16
  17struct ceph_msg;
  18struct ceph_connection;
  19
  20/*
  21 * Ceph defines these callbacks for handling connection events.
  22 */
  23struct ceph_connection_operations {
  24        struct ceph_connection *(*get)(struct ceph_connection *);
  25        void (*put)(struct ceph_connection *);
  26
  27        /* handle an incoming message. */
  28        void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
  29
  30        /* authorize an outgoing connection */
  31        struct ceph_auth_handshake *(*get_authorizer) (
  32                                struct ceph_connection *con,
  33                               int *proto, int force_new);
  34        int (*add_authorizer_challenge)(struct ceph_connection *con,
  35                                        void *challenge_buf,
  36                                        int challenge_buf_len);
  37        int (*verify_authorizer_reply) (struct ceph_connection *con);
  38        int (*invalidate_authorizer)(struct ceph_connection *con);
  39
  40        /* there was some error on the socket (disconnect, whatever) */
  41        void (*fault) (struct ceph_connection *con);
  42
  43        /* a remote host as terminated a message exchange session, and messages
  44         * we sent (or they tried to send us) may be lost. */
  45        void (*peer_reset) (struct ceph_connection *con);
  46
  47        struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
  48                                        struct ceph_msg_header *hdr,
  49                                        int *skip);
  50
  51        void (*reencode_message) (struct ceph_msg *msg);
  52
  53        int (*sign_message) (struct ceph_msg *msg);
  54        int (*check_message_signature) (struct ceph_msg *msg);
  55};
  56
  57/* use format string %s%d */
  58#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
  59
  60struct ceph_messenger {
  61        struct ceph_entity_inst inst;    /* my name+address */
  62        struct ceph_entity_addr my_enc_addr;
  63
  64        atomic_t stopping;
  65        possible_net_t net;
  66
  67        /*
  68         * the global_seq counts connections i (attempt to) initiate
  69         * in order to disambiguate certain connect race conditions.
  70         */
  71        u32 global_seq;
  72        spinlock_t global_seq_lock;
  73};
  74
  75enum ceph_msg_data_type {
  76        CEPH_MSG_DATA_NONE,     /* message contains no data payload */
  77        CEPH_MSG_DATA_PAGES,    /* data source/destination is a page array */
  78        CEPH_MSG_DATA_PAGELIST, /* data source/destination is a pagelist */
  79#ifdef CONFIG_BLOCK
  80        CEPH_MSG_DATA_BIO,      /* data source/destination is a bio list */
  81#endif /* CONFIG_BLOCK */
  82        CEPH_MSG_DATA_BVECS,    /* data source/destination is a bio_vec array */
  83};
  84
  85static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
  86{
  87        switch (type) {
  88        case CEPH_MSG_DATA_NONE:
  89        case CEPH_MSG_DATA_PAGES:
  90        case CEPH_MSG_DATA_PAGELIST:
  91#ifdef CONFIG_BLOCK
  92        case CEPH_MSG_DATA_BIO:
  93#endif /* CONFIG_BLOCK */
  94        case CEPH_MSG_DATA_BVECS:
  95                return true;
  96        default:
  97                return false;
  98        }
  99}
 100
 101#ifdef CONFIG_BLOCK
 102
 103struct ceph_bio_iter {
 104        struct bio *bio;
 105        struct bvec_iter iter;
 106};
 107
 108#define __ceph_bio_iter_advance_step(it, n, STEP) do {                        \
 109        unsigned int __n = (n), __cur_n;                                      \
 110                                                                              \
 111        while (__n) {                                                         \
 112                BUG_ON(!(it)->iter.bi_size);                                  \
 113                __cur_n = min((it)->iter.bi_size, __n);                       \
 114                (void)(STEP);                                                 \
 115                bio_advance_iter((it)->bio, &(it)->iter, __cur_n);            \
 116                if (!(it)->iter.bi_size && (it)->bio->bi_next) {              \
 117                        dout("__ceph_bio_iter_advance_step next bio\n");      \
 118                        (it)->bio = (it)->bio->bi_next;                       \
 119                        (it)->iter = (it)->bio->bi_iter;                      \
 120                }                                                             \
 121                __n -= __cur_n;                                               \
 122        }                                                                     \
 123} while (0)
 124
 125/*
 126 * Advance @it by @n bytes.
 127 */
 128#define ceph_bio_iter_advance(it, n)                                          \
 129        __ceph_bio_iter_advance_step(it, n, 0)
 130
 131/*
 132 * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
 133 */
 134#define ceph_bio_iter_advance_step(it, n, BVEC_STEP)                          \
 135        __ceph_bio_iter_advance_step(it, n, ({                                \
 136                struct bio_vec bv;                                            \
 137                struct bvec_iter __cur_iter;                                  \
 138                                                                              \
 139                __cur_iter = (it)->iter;                                      \
 140                __cur_iter.bi_size = __cur_n;                                 \
 141                __bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \
 142                        (void)(BVEC_STEP);                                    \
 143        }))
 144
 145#endif /* CONFIG_BLOCK */
 146
 147struct ceph_bvec_iter {
 148        struct bio_vec *bvecs;
 149        struct bvec_iter iter;
 150};
 151
 152#define __ceph_bvec_iter_advance_step(it, n, STEP) do {                       \
 153        BUG_ON((n) > (it)->iter.bi_size);                                     \
 154        (void)(STEP);                                                         \
 155        bvec_iter_advance((it)->bvecs, &(it)->iter, (n));                     \
 156} while (0)
 157
 158/*
 159 * Advance @it by @n bytes.
 160 */
 161#define ceph_bvec_iter_advance(it, n)                                         \
 162        __ceph_bvec_iter_advance_step(it, n, 0)
 163
 164/*
 165 * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
 166 */
 167#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP)                         \
 168        __ceph_bvec_iter_advance_step(it, n, ({                               \
 169                struct bio_vec bv;                                            \
 170                struct bvec_iter __cur_iter;                                  \
 171                                                                              \
 172                __cur_iter = (it)->iter;                                      \
 173                __cur_iter.bi_size = (n);                                     \
 174                for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter)        \
 175                        (void)(BVEC_STEP);                                    \
 176        }))
 177
 178#define ceph_bvec_iter_shorten(it, n) do {                                    \
 179        BUG_ON((n) > (it)->iter.bi_size);                                     \
 180        (it)->iter.bi_size = (n);                                             \
 181} while (0)
 182
 183struct ceph_msg_data {
 184        struct list_head                links;  /* ceph_msg->data */
 185        enum ceph_msg_data_type         type;
 186        union {
 187#ifdef CONFIG_BLOCK
 188                struct {
 189                        struct ceph_bio_iter    bio_pos;
 190                        u32                     bio_length;
 191                };
 192#endif /* CONFIG_BLOCK */
 193                struct ceph_bvec_iter   bvec_pos;
 194                struct {
 195                        struct page     **pages;        /* NOT OWNER. */
 196                        size_t          length;         /* total # bytes */
 197                        unsigned int    alignment;      /* first page */
 198                };
 199                struct ceph_pagelist    *pagelist;
 200        };
 201};
 202
 203struct ceph_msg_data_cursor {
 204        size_t                  total_resid;    /* across all data items */
 205        struct list_head        *data_head;     /* = &ceph_msg->data */
 206
 207        struct ceph_msg_data    *data;          /* current data item */
 208        size_t                  resid;          /* bytes not yet consumed */
 209        bool                    last_piece;     /* current is last piece */
 210        bool                    need_crc;       /* crc update needed */
 211        union {
 212#ifdef CONFIG_BLOCK
 213                struct ceph_bio_iter    bio_iter;
 214#endif /* CONFIG_BLOCK */
 215                struct bvec_iter        bvec_iter;
 216                struct {                                /* pages */
 217                        unsigned int    page_offset;    /* offset in page */
 218                        unsigned short  page_index;     /* index in array */
 219                        unsigned short  page_count;     /* pages in array */
 220                };
 221                struct {                                /* pagelist */
 222                        struct page     *page;          /* page from list */
 223                        size_t          offset;         /* bytes from list */
 224                };
 225        };
 226};
 227
 228/*
 229 * a single message.  it contains a header (src, dest, message type, etc.),
 230 * footer (crc values, mainly), a "front" message body, and possibly a
 231 * data payload (stored in some number of pages).
 232 */
 233struct ceph_msg {
 234        struct ceph_msg_header hdr;     /* header */
 235        union {
 236                struct ceph_msg_footer footer;          /* footer */
 237                struct ceph_msg_footer_old old_footer;  /* old format footer */
 238        };
 239        struct kvec front;              /* unaligned blobs of message */
 240        struct ceph_buffer *middle;
 241
 242        size_t                          data_length;
 243        struct list_head                data;
 244        struct ceph_msg_data_cursor     cursor;
 245
 246        struct ceph_connection *con;
 247        struct list_head list_head;     /* links for connection lists */
 248
 249        struct kref kref;
 250        bool more_to_follow;
 251        bool needs_out_seq;
 252        int front_alloc_len;
 253        unsigned long ack_stamp;        /* tx: when we were acked */
 254
 255        struct ceph_msgpool *pool;
 256};
 257
 258/* ceph connection fault delay defaults, for exponential backoff */
 259#define BASE_DELAY_INTERVAL     (HZ/2)
 260#define MAX_DELAY_INTERVAL      (5 * 60 * HZ)
 261
 262/*
 263 * A single connection with another host.
 264 *
 265 * We maintain a queue of outgoing messages, and some session state to
 266 * ensure that we can preserve the lossless, ordered delivery of
 267 * messages in the case of a TCP disconnect.
 268 */
 269struct ceph_connection {
 270        void *private;
 271
 272        const struct ceph_connection_operations *ops;
 273
 274        struct ceph_messenger *msgr;
 275
 276        atomic_t sock_state;
 277        struct socket *sock;
 278        struct ceph_entity_addr peer_addr; /* peer address */
 279        struct ceph_entity_addr peer_addr_for_me;
 280
 281        unsigned long flags;
 282        unsigned long state;
 283        const char *error_msg;  /* error message, if any */
 284
 285        struct ceph_entity_name peer_name; /* peer name */
 286
 287        u64 peer_features;
 288        u32 connect_seq;      /* identify the most recent connection
 289                                 attempt for this connection, client */
 290        u32 peer_global_seq;  /* peer's global seq for this connection */
 291
 292        struct ceph_auth_handshake *auth;
 293        int auth_retry;       /* true if we need a newer authorizer */
 294
 295        struct mutex mutex;
 296
 297        /* out queue */
 298        struct list_head out_queue;
 299        struct list_head out_sent;   /* sending or sent but unacked */
 300        u64 out_seq;                 /* last message queued for send */
 301
 302        u64 in_seq, in_seq_acked;  /* last message received, acked */
 303
 304        /* connection negotiation temps */
 305        char in_banner[CEPH_BANNER_MAX_LEN];
 306        struct ceph_msg_connect out_connect;
 307        struct ceph_msg_connect_reply in_reply;
 308        struct ceph_entity_addr actual_peer_addr;
 309
 310        /* message out temps */
 311        struct ceph_msg_header out_hdr;
 312        struct ceph_msg *out_msg;        /* sending message (== tail of
 313                                            out_sent) */
 314        bool out_msg_done;
 315
 316        struct kvec out_kvec[8],         /* sending header/footer data */
 317                *out_kvec_cur;
 318        int out_kvec_left;   /* kvec's left in out_kvec */
 319        int out_skip;        /* skip this many bytes */
 320        int out_kvec_bytes;  /* total bytes left */
 321        int out_more;        /* there is more data after the kvecs */
 322        __le64 out_temp_ack; /* for writing an ack */
 323        struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
 324                                                     stamp */
 325
 326        /* message in temps */
 327        struct ceph_msg_header in_hdr;
 328        struct ceph_msg *in_msg;
 329        u32 in_front_crc, in_middle_crc, in_data_crc;  /* calculated crc */
 330
 331        char in_tag;         /* protocol control byte */
 332        int in_base_pos;     /* bytes read */
 333        __le64 in_temp_ack;  /* for reading an ack */
 334
 335        struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
 336
 337        struct delayed_work work;           /* send|recv work */
 338        unsigned long       delay;          /* current delay interval */
 339};
 340
 341
 342extern const char *ceph_pr_addr(const struct sockaddr_storage *ss);
 343extern int ceph_parse_ips(const char *c, const char *end,
 344                          struct ceph_entity_addr *addr,
 345                          int max_count, int *count);
 346
 347
 348extern int ceph_msgr_init(void);
 349extern void ceph_msgr_exit(void);
 350extern void ceph_msgr_flush(void);
 351
 352extern void ceph_messenger_init(struct ceph_messenger *msgr,
 353                                struct ceph_entity_addr *myaddr);
 354extern void ceph_messenger_fini(struct ceph_messenger *msgr);
 355
 356extern void ceph_con_init(struct ceph_connection *con, void *private,
 357                        const struct ceph_connection_operations *ops,
 358                        struct ceph_messenger *msgr);
 359extern void ceph_con_open(struct ceph_connection *con,
 360                          __u8 entity_type, __u64 entity_num,
 361                          struct ceph_entity_addr *addr);
 362extern bool ceph_con_opened(struct ceph_connection *con);
 363extern void ceph_con_close(struct ceph_connection *con);
 364extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
 365
 366extern void ceph_msg_revoke(struct ceph_msg *msg);
 367extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
 368
 369extern void ceph_con_keepalive(struct ceph_connection *con);
 370extern bool ceph_con_keepalive_expired(struct ceph_connection *con,
 371                                       unsigned long interval);
 372
 373extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 374                                size_t length, size_t alignment);
 375extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 376                                struct ceph_pagelist *pagelist);
 377#ifdef CONFIG_BLOCK
 378void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 379                           u32 length);
 380#endif /* CONFIG_BLOCK */
 381void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 382                             struct ceph_bvec_iter *bvec_pos);
 383
 384extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 385                                     bool can_fail);
 386
 387extern struct ceph_msg *ceph_msg_get(struct ceph_msg *msg);
 388extern void ceph_msg_put(struct ceph_msg *msg);
 389
 390extern void ceph_msg_dump(struct ceph_msg *msg);
 391
 392#endif
 393