linux/include/linux/ceph/messenger.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef __FS_CEPH_MESSENGER_H
   3#define __FS_CEPH_MESSENGER_H
   4
   5#include <linux/bvec.h>
   6#include <linux/kref.h>
   7#include <linux/mutex.h>
   8#include <linux/net.h>
   9#include <linux/radix-tree.h>
  10#include <linux/uio.h>
  11#include <linux/workqueue.h>
  12#include <net/net_namespace.h>
  13
  14#include <linux/ceph/types.h>
  15#include <linux/ceph/buffer.h>
  16
  17struct ceph_msg;
  18struct ceph_connection;
  19
  20/*
  21 * Ceph defines these callbacks for handling connection events.
  22 */
  23struct ceph_connection_operations {
  24        struct ceph_connection *(*get)(struct ceph_connection *);
  25        void (*put)(struct ceph_connection *);
  26
  27        /* handle an incoming message. */
  28        void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
  29
  30        /* authorize an outgoing connection */
  31        struct ceph_auth_handshake *(*get_authorizer) (
  32                                struct ceph_connection *con,
  33                               int *proto, int force_new);
  34        int (*add_authorizer_challenge)(struct ceph_connection *con,
  35                                        void *challenge_buf,
  36                                        int challenge_buf_len);
  37        int (*verify_authorizer_reply) (struct ceph_connection *con);
  38        int (*invalidate_authorizer)(struct ceph_connection *con);
  39
  40        /* there was some error on the socket (disconnect, whatever) */
  41        void (*fault) (struct ceph_connection *con);
  42
  43        /* a remote host as terminated a message exchange session, and messages
  44         * we sent (or they tried to send us) may be lost. */
  45        void (*peer_reset) (struct ceph_connection *con);
  46
  47        struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
  48                                        struct ceph_msg_header *hdr,
  49                                        int *skip);
  50
  51        void (*reencode_message) (struct ceph_msg *msg);
  52
  53        int (*sign_message) (struct ceph_msg *msg);
  54        int (*check_message_signature) (struct ceph_msg *msg);
  55};
  56
  57/* use format string %s%d */
  58#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
  59
  60struct ceph_messenger {
  61        struct ceph_entity_inst inst;    /* my name+address */
  62        struct ceph_entity_addr my_enc_addr;
  63
  64        atomic_t stopping;
  65        possible_net_t net;
  66
  67        /*
  68         * the global_seq counts connections i (attempt to) initiate
  69         * in order to disambiguate certain connect race conditions.
  70         */
  71        u32 global_seq;
  72        spinlock_t global_seq_lock;
  73};
  74
  75enum ceph_msg_data_type {
  76        CEPH_MSG_DATA_NONE,     /* message contains no data payload */
  77        CEPH_MSG_DATA_PAGES,    /* data source/destination is a page array */
  78        CEPH_MSG_DATA_PAGELIST, /* data source/destination is a pagelist */
  79#ifdef CONFIG_BLOCK
  80        CEPH_MSG_DATA_BIO,      /* data source/destination is a bio list */
  81#endif /* CONFIG_BLOCK */
  82        CEPH_MSG_DATA_BVECS,    /* data source/destination is a bio_vec array */
  83};
  84
  85#ifdef CONFIG_BLOCK
  86
  87struct ceph_bio_iter {
  88        struct bio *bio;
  89        struct bvec_iter iter;
  90};
  91
  92#define __ceph_bio_iter_advance_step(it, n, STEP) do {                        \
  93        unsigned int __n = (n), __cur_n;                                      \
  94                                                                              \
  95        while (__n) {                                                         \
  96                BUG_ON(!(it)->iter.bi_size);                                  \
  97                __cur_n = min((it)->iter.bi_size, __n);                       \
  98                (void)(STEP);                                                 \
  99                bio_advance_iter((it)->bio, &(it)->iter, __cur_n);            \
 100                if (!(it)->iter.bi_size && (it)->bio->bi_next) {              \
 101                        dout("__ceph_bio_iter_advance_step next bio\n");      \
 102                        (it)->bio = (it)->bio->bi_next;                       \
 103                        (it)->iter = (it)->bio->bi_iter;                      \
 104                }                                                             \
 105                __n -= __cur_n;                                               \
 106        }                                                                     \
 107} while (0)
 108
 109/*
 110 * Advance @it by @n bytes.
 111 */
 112#define ceph_bio_iter_advance(it, n)                                          \
 113        __ceph_bio_iter_advance_step(it, n, 0)
 114
 115/*
 116 * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
 117 */
 118#define ceph_bio_iter_advance_step(it, n, BVEC_STEP)                          \
 119        __ceph_bio_iter_advance_step(it, n, ({                                \
 120                struct bio_vec bv;                                            \
 121                struct bvec_iter __cur_iter;                                  \
 122                                                                              \
 123                __cur_iter = (it)->iter;                                      \
 124                __cur_iter.bi_size = __cur_n;                                 \
 125                __bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \
 126                        (void)(BVEC_STEP);                                    \
 127        }))
 128
 129#endif /* CONFIG_BLOCK */
 130
 131struct ceph_bvec_iter {
 132        struct bio_vec *bvecs;
 133        struct bvec_iter iter;
 134};
 135
 136#define __ceph_bvec_iter_advance_step(it, n, STEP) do {                       \
 137        BUG_ON((n) > (it)->iter.bi_size);                                     \
 138        (void)(STEP);                                                         \
 139        bvec_iter_advance((it)->bvecs, &(it)->iter, (n));                     \
 140} while (0)
 141
 142/*
 143 * Advance @it by @n bytes.
 144 */
 145#define ceph_bvec_iter_advance(it, n)                                         \
 146        __ceph_bvec_iter_advance_step(it, n, 0)
 147
 148/*
 149 * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
 150 */
 151#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP)                         \
 152        __ceph_bvec_iter_advance_step(it, n, ({                               \
 153                struct bio_vec bv;                                            \
 154                struct bvec_iter __cur_iter;                                  \
 155                                                                              \
 156                __cur_iter = (it)->iter;                                      \
 157                __cur_iter.bi_size = (n);                                     \
 158                for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter)        \
 159                        (void)(BVEC_STEP);                                    \
 160        }))
 161
 162#define ceph_bvec_iter_shorten(it, n) do {                                    \
 163        BUG_ON((n) > (it)->iter.bi_size);                                     \
 164        (it)->iter.bi_size = (n);                                             \
 165} while (0)
 166
 167struct ceph_msg_data {
 168        enum ceph_msg_data_type         type;
 169        union {
 170#ifdef CONFIG_BLOCK
 171                struct {
 172                        struct ceph_bio_iter    bio_pos;
 173                        u32                     bio_length;
 174                };
 175#endif /* CONFIG_BLOCK */
 176                struct ceph_bvec_iter   bvec_pos;
 177                struct {
 178                        struct page     **pages;
 179                        size_t          length;         /* total # bytes */
 180                        unsigned int    alignment;      /* first page */
 181                        bool            own_pages;
 182                };
 183                struct ceph_pagelist    *pagelist;
 184        };
 185};
 186
 187struct ceph_msg_data_cursor {
 188        size_t                  total_resid;    /* across all data items */
 189
 190        struct ceph_msg_data    *data;          /* current data item */
 191        size_t                  resid;          /* bytes not yet consumed */
 192        bool                    last_piece;     /* current is last piece */
 193        bool                    need_crc;       /* crc update needed */
 194        union {
 195#ifdef CONFIG_BLOCK
 196                struct ceph_bio_iter    bio_iter;
 197#endif /* CONFIG_BLOCK */
 198                struct bvec_iter        bvec_iter;
 199                struct {                                /* pages */
 200                        unsigned int    page_offset;    /* offset in page */
 201                        unsigned short  page_index;     /* index in array */
 202                        unsigned short  page_count;     /* pages in array */
 203                };
 204                struct {                                /* pagelist */
 205                        struct page     *page;          /* page from list */
 206                        size_t          offset;         /* bytes from list */
 207                };
 208        };
 209};
 210
 211/*
 212 * a single message.  it contains a header (src, dest, message type, etc.),
 213 * footer (crc values, mainly), a "front" message body, and possibly a
 214 * data payload (stored in some number of pages).
 215 */
 216struct ceph_msg {
 217        struct ceph_msg_header hdr;     /* header */
 218        union {
 219                struct ceph_msg_footer footer;          /* footer */
 220                struct ceph_msg_footer_old old_footer;  /* old format footer */
 221        };
 222        struct kvec front;              /* unaligned blobs of message */
 223        struct ceph_buffer *middle;
 224
 225        size_t                          data_length;
 226        struct ceph_msg_data            *data;
 227        int                             num_data_items;
 228        int                             max_data_items;
 229        struct ceph_msg_data_cursor     cursor;
 230
 231        struct ceph_connection *con;
 232        struct list_head list_head;     /* links for connection lists */
 233
 234        struct kref kref;
 235        bool more_to_follow;
 236        bool needs_out_seq;
 237        int front_alloc_len;
 238        unsigned long ack_stamp;        /* tx: when we were acked */
 239
 240        struct ceph_msgpool *pool;
 241};
 242
 243/* ceph connection fault delay defaults, for exponential backoff */
 244#define BASE_DELAY_INTERVAL     (HZ/2)
 245#define MAX_DELAY_INTERVAL      (5 * 60 * HZ)
 246
 247/*
 248 * A single connection with another host.
 249 *
 250 * We maintain a queue of outgoing messages, and some session state to
 251 * ensure that we can preserve the lossless, ordered delivery of
 252 * messages in the case of a TCP disconnect.
 253 */
 254struct ceph_connection {
 255        void *private;
 256
 257        const struct ceph_connection_operations *ops;
 258
 259        struct ceph_messenger *msgr;
 260
 261        atomic_t sock_state;
 262        struct socket *sock;
 263        struct ceph_entity_addr peer_addr; /* peer address */
 264        struct ceph_entity_addr peer_addr_for_me;
 265
 266        unsigned long flags;
 267        unsigned long state;
 268        const char *error_msg;  /* error message, if any */
 269
 270        struct ceph_entity_name peer_name; /* peer name */
 271
 272        u64 peer_features;
 273        u32 connect_seq;      /* identify the most recent connection
 274                                 attempt for this connection, client */
 275        u32 peer_global_seq;  /* peer's global seq for this connection */
 276
 277        struct ceph_auth_handshake *auth;
 278        int auth_retry;       /* true if we need a newer authorizer */
 279
 280        struct mutex mutex;
 281
 282        /* out queue */
 283        struct list_head out_queue;
 284        struct list_head out_sent;   /* sending or sent but unacked */
 285        u64 out_seq;                 /* last message queued for send */
 286
 287        u64 in_seq, in_seq_acked;  /* last message received, acked */
 288
 289        /* connection negotiation temps */
 290        char in_banner[CEPH_BANNER_MAX_LEN];
 291        struct ceph_msg_connect out_connect;
 292        struct ceph_msg_connect_reply in_reply;
 293        struct ceph_entity_addr actual_peer_addr;
 294
 295        /* message out temps */
 296        struct ceph_msg_header out_hdr;
 297        struct ceph_msg *out_msg;        /* sending message (== tail of
 298                                            out_sent) */
 299        bool out_msg_done;
 300
 301        struct kvec out_kvec[8],         /* sending header/footer data */
 302                *out_kvec_cur;
 303        int out_kvec_left;   /* kvec's left in out_kvec */
 304        int out_skip;        /* skip this many bytes */
 305        int out_kvec_bytes;  /* total bytes left */
 306        int out_more;        /* there is more data after the kvecs */
 307        __le64 out_temp_ack; /* for writing an ack */
 308        struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
 309                                                     stamp */
 310
 311        /* message in temps */
 312        struct ceph_msg_header in_hdr;
 313        struct ceph_msg *in_msg;
 314        u32 in_front_crc, in_middle_crc, in_data_crc;  /* calculated crc */
 315
 316        char in_tag;         /* protocol control byte */
 317        int in_base_pos;     /* bytes read */
 318        __le64 in_temp_ack;  /* for reading an ack */
 319
 320        struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
 321
 322        struct delayed_work work;           /* send|recv work */
 323        unsigned long       delay;          /* current delay interval */
 324};
 325
 326
 327extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
 328
 329extern int ceph_parse_ips(const char *c, const char *end,
 330                          struct ceph_entity_addr *addr,
 331                          int max_count, int *count);
 332
 333
 334extern int ceph_msgr_init(void);
 335extern void ceph_msgr_exit(void);
 336extern void ceph_msgr_flush(void);
 337
 338extern void ceph_messenger_init(struct ceph_messenger *msgr,
 339                                struct ceph_entity_addr *myaddr);
 340extern void ceph_messenger_fini(struct ceph_messenger *msgr);
 341extern void ceph_messenger_reset_nonce(struct ceph_messenger *msgr);
 342
 343extern void ceph_con_init(struct ceph_connection *con, void *private,
 344                        const struct ceph_connection_operations *ops,
 345                        struct ceph_messenger *msgr);
 346extern void ceph_con_open(struct ceph_connection *con,
 347                          __u8 entity_type, __u64 entity_num,
 348                          struct ceph_entity_addr *addr);
 349extern bool ceph_con_opened(struct ceph_connection *con);
 350extern void ceph_con_close(struct ceph_connection *con);
 351extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
 352
 353extern void ceph_msg_revoke(struct ceph_msg *msg);
 354extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
 355
 356extern void ceph_con_keepalive(struct ceph_connection *con);
 357extern bool ceph_con_keepalive_expired(struct ceph_connection *con,
 358                                       unsigned long interval);
 359
 360void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 361                             size_t length, size_t alignment, bool own_pages);
 362extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 363                                struct ceph_pagelist *pagelist);
 364#ifdef CONFIG_BLOCK
 365void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 366                           u32 length);
 367#endif /* CONFIG_BLOCK */
 368void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 369                             struct ceph_bvec_iter *bvec_pos);
 370
 371struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
 372                               gfp_t flags, bool can_fail);
 373extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 374                                     bool can_fail);
 375
 376extern struct ceph_msg *ceph_msg_get(struct ceph_msg *msg);
 377extern void ceph_msg_put(struct ceph_msg *msg);
 378
 379extern void ceph_msg_dump(struct ceph_msg *msg);
 380
 381#endif
 382