linux/include/linux/ceph/messenger.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef __FS_CEPH_MESSENGER_H
   3#define __FS_CEPH_MESSENGER_H
   4
   5#include <linux/bvec.h>
   6#include <linux/kref.h>
   7#include <linux/mutex.h>
   8#include <linux/net.h>
   9#include <linux/radix-tree.h>
  10#include <linux/uio.h>
  11#include <linux/workqueue.h>
  12#include <net/net_namespace.h>
  13
  14#include <linux/ceph/types.h>
  15#include <linux/ceph/buffer.h>
  16
  17struct ceph_msg;
  18struct ceph_connection;
  19
  20/*
  21 * Ceph defines these callbacks for handling connection events.
  22 */
  23struct ceph_connection_operations {
  24        struct ceph_connection *(*get)(struct ceph_connection *);
  25        void (*put)(struct ceph_connection *);
  26
  27        /* handle an incoming message. */
  28        void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);
  29
  30        /* authorize an outgoing connection */
  31        struct ceph_auth_handshake *(*get_authorizer) (
  32                                struct ceph_connection *con,
  33                               int *proto, int force_new);
  34        int (*add_authorizer_challenge)(struct ceph_connection *con,
  35                                        void *challenge_buf,
  36                                        int challenge_buf_len);
  37        int (*verify_authorizer_reply) (struct ceph_connection *con);
  38        int (*invalidate_authorizer)(struct ceph_connection *con);
  39
  40        /* there was some error on the socket (disconnect, whatever) */
  41        void (*fault) (struct ceph_connection *con);
  42
  43        /* a remote host as terminated a message exchange session, and messages
  44         * we sent (or they tried to send us) may be lost. */
  45        void (*peer_reset) (struct ceph_connection *con);
  46
  47        struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
  48                                        struct ceph_msg_header *hdr,
  49                                        int *skip);
  50
  51        void (*reencode_message) (struct ceph_msg *msg);
  52
  53        int (*sign_message) (struct ceph_msg *msg);
  54        int (*check_message_signature) (struct ceph_msg *msg);
  55};
  56
  57/* use format string %s%d */
  58#define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num)
  59
  60struct ceph_messenger {
  61        struct ceph_entity_inst inst;    /* my name+address */
  62        struct ceph_entity_addr my_enc_addr;
  63
  64        atomic_t stopping;
  65        possible_net_t net;
  66
  67        /*
  68         * the global_seq counts connections i (attempt to) initiate
  69         * in order to disambiguate certain connect race conditions.
  70         */
  71        u32 global_seq;
  72        spinlock_t global_seq_lock;
  73};
  74
  75enum ceph_msg_data_type {
  76        CEPH_MSG_DATA_NONE,     /* message contains no data payload */
  77        CEPH_MSG_DATA_PAGES,    /* data source/destination is a page array */
  78        CEPH_MSG_DATA_PAGELIST, /* data source/destination is a pagelist */
  79#ifdef CONFIG_BLOCK
  80        CEPH_MSG_DATA_BIO,      /* data source/destination is a bio list */
  81#endif /* CONFIG_BLOCK */
  82        CEPH_MSG_DATA_BVECS,    /* data source/destination is a bio_vec array */
  83};
  84
  85#ifdef CONFIG_BLOCK
  86
  87struct ceph_bio_iter {
  88        struct bio *bio;
  89        struct bvec_iter iter;
  90};
  91
  92#define __ceph_bio_iter_advance_step(it, n, STEP) do {                        \
  93        unsigned int __n = (n), __cur_n;                                      \
  94                                                                              \
  95        while (__n) {                                                         \
  96                BUG_ON(!(it)->iter.bi_size);                                  \
  97                __cur_n = min((it)->iter.bi_size, __n);                       \
  98                (void)(STEP);                                                 \
  99                bio_advance_iter((it)->bio, &(it)->iter, __cur_n);            \
 100                if (!(it)->iter.bi_size && (it)->bio->bi_next) {              \
 101                        dout("__ceph_bio_iter_advance_step next bio\n");      \
 102                        (it)->bio = (it)->bio->bi_next;                       \
 103                        (it)->iter = (it)->bio->bi_iter;                      \
 104                }                                                             \
 105                __n -= __cur_n;                                               \
 106        }                                                                     \
 107} while (0)
 108
 109/*
 110 * Advance @it by @n bytes.
 111 */
 112#define ceph_bio_iter_advance(it, n)                                          \
 113        __ceph_bio_iter_advance_step(it, n, 0)
 114
 115/*
 116 * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
 117 */
 118#define ceph_bio_iter_advance_step(it, n, BVEC_STEP)                          \
 119        __ceph_bio_iter_advance_step(it, n, ({                                \
 120                struct bio_vec bv;                                            \
 121                struct bvec_iter __cur_iter;                                  \
 122                                                                              \
 123                __cur_iter = (it)->iter;                                      \
 124                __cur_iter.bi_size = __cur_n;                                 \
 125                __bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \
 126                        (void)(BVEC_STEP);                                    \
 127        }))
 128
 129#endif /* CONFIG_BLOCK */
 130
 131struct ceph_bvec_iter {
 132        struct bio_vec *bvecs;
 133        struct bvec_iter iter;
 134};
 135
 136#define __ceph_bvec_iter_advance_step(it, n, STEP) do {                       \
 137        BUG_ON((n) > (it)->iter.bi_size);                                     \
 138        (void)(STEP);                                                         \
 139        bvec_iter_advance((it)->bvecs, &(it)->iter, (n));                     \
 140} while (0)
 141
 142/*
 143 * Advance @it by @n bytes.
 144 */
 145#define ceph_bvec_iter_advance(it, n)                                         \
 146        __ceph_bvec_iter_advance_step(it, n, 0)
 147
 148/*
 149 * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
 150 */
 151#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP)                         \
 152        __ceph_bvec_iter_advance_step(it, n, ({                               \
 153                struct bio_vec bv;                                            \
 154                struct bvec_iter __cur_iter;                                  \
 155                                                                              \
 156                __cur_iter = (it)->iter;                                      \
 157                __cur_iter.bi_size = (n);                                     \
 158                for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter)        \
 159                        (void)(BVEC_STEP);                                    \
 160        }))
 161
 162#define ceph_bvec_iter_shorten(it, n) do {                                    \
 163        BUG_ON((n) > (it)->iter.bi_size);                                     \
 164        (it)->iter.bi_size = (n);                                             \
 165} while (0)
 166
 167struct ceph_msg_data {
 168        enum ceph_msg_data_type         type;
 169        union {
 170#ifdef CONFIG_BLOCK
 171                struct {
 172                        struct ceph_bio_iter    bio_pos;
 173                        u32                     bio_length;
 174                };
 175#endif /* CONFIG_BLOCK */
 176                struct ceph_bvec_iter   bvec_pos;
 177                struct {
 178                        struct page     **pages;        /* NOT OWNER. */
 179                        size_t          length;         /* total # bytes */
 180                        unsigned int    alignment;      /* first page */
 181                };
 182                struct ceph_pagelist    *pagelist;
 183        };
 184};
 185
 186struct ceph_msg_data_cursor {
 187        size_t                  total_resid;    /* across all data items */
 188
 189        struct ceph_msg_data    *data;          /* current data item */
 190        size_t                  resid;          /* bytes not yet consumed */
 191        bool                    last_piece;     /* current is last piece */
 192        bool                    need_crc;       /* crc update needed */
 193        union {
 194#ifdef CONFIG_BLOCK
 195                struct ceph_bio_iter    bio_iter;
 196#endif /* CONFIG_BLOCK */
 197                struct bvec_iter        bvec_iter;
 198                struct {                                /* pages */
 199                        unsigned int    page_offset;    /* offset in page */
 200                        unsigned short  page_index;     /* index in array */
 201                        unsigned short  page_count;     /* pages in array */
 202                };
 203                struct {                                /* pagelist */
 204                        struct page     *page;          /* page from list */
 205                        size_t          offset;         /* bytes from list */
 206                };
 207        };
 208};
 209
 210/*
 211 * a single message.  it contains a header (src, dest, message type, etc.),
 212 * footer (crc values, mainly), a "front" message body, and possibly a
 213 * data payload (stored in some number of pages).
 214 */
 215struct ceph_msg {
 216        struct ceph_msg_header hdr;     /* header */
 217        union {
 218                struct ceph_msg_footer footer;          /* footer */
 219                struct ceph_msg_footer_old old_footer;  /* old format footer */
 220        };
 221        struct kvec front;              /* unaligned blobs of message */
 222        struct ceph_buffer *middle;
 223
 224        size_t                          data_length;
 225        struct ceph_msg_data            *data;
 226        int                             num_data_items;
 227        int                             max_data_items;
 228        struct ceph_msg_data_cursor     cursor;
 229
 230        struct ceph_connection *con;
 231        struct list_head list_head;     /* links for connection lists */
 232
 233        struct kref kref;
 234        bool more_to_follow;
 235        bool needs_out_seq;
 236        int front_alloc_len;
 237        unsigned long ack_stamp;        /* tx: when we were acked */
 238
 239        struct ceph_msgpool *pool;
 240};
 241
 242/* ceph connection fault delay defaults, for exponential backoff */
 243#define BASE_DELAY_INTERVAL     (HZ/2)
 244#define MAX_DELAY_INTERVAL      (5 * 60 * HZ)
 245
 246/*
 247 * A single connection with another host.
 248 *
 249 * We maintain a queue of outgoing messages, and some session state to
 250 * ensure that we can preserve the lossless, ordered delivery of
 251 * messages in the case of a TCP disconnect.
 252 */
 253struct ceph_connection {
 254        void *private;
 255
 256        const struct ceph_connection_operations *ops;
 257
 258        struct ceph_messenger *msgr;
 259
 260        atomic_t sock_state;
 261        struct socket *sock;
 262        struct ceph_entity_addr peer_addr; /* peer address */
 263        struct ceph_entity_addr peer_addr_for_me;
 264
 265        unsigned long flags;
 266        unsigned long state;
 267        const char *error_msg;  /* error message, if any */
 268
 269        struct ceph_entity_name peer_name; /* peer name */
 270
 271        u64 peer_features;
 272        u32 connect_seq;      /* identify the most recent connection
 273                                 attempt for this connection, client */
 274        u32 peer_global_seq;  /* peer's global seq for this connection */
 275
 276        struct ceph_auth_handshake *auth;
 277        int auth_retry;       /* true if we need a newer authorizer */
 278
 279        struct mutex mutex;
 280
 281        /* out queue */
 282        struct list_head out_queue;
 283        struct list_head out_sent;   /* sending or sent but unacked */
 284        u64 out_seq;                 /* last message queued for send */
 285
 286        u64 in_seq, in_seq_acked;  /* last message received, acked */
 287
 288        /* connection negotiation temps */
 289        char in_banner[CEPH_BANNER_MAX_LEN];
 290        struct ceph_msg_connect out_connect;
 291        struct ceph_msg_connect_reply in_reply;
 292        struct ceph_entity_addr actual_peer_addr;
 293
 294        /* message out temps */
 295        struct ceph_msg_header out_hdr;
 296        struct ceph_msg *out_msg;        /* sending message (== tail of
 297                                            out_sent) */
 298        bool out_msg_done;
 299
 300        struct kvec out_kvec[8],         /* sending header/footer data */
 301                *out_kvec_cur;
 302        int out_kvec_left;   /* kvec's left in out_kvec */
 303        int out_skip;        /* skip this many bytes */
 304        int out_kvec_bytes;  /* total bytes left */
 305        int out_more;        /* there is more data after the kvecs */
 306        __le64 out_temp_ack; /* for writing an ack */
 307        struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
 308                                                     stamp */
 309
 310        /* message in temps */
 311        struct ceph_msg_header in_hdr;
 312        struct ceph_msg *in_msg;
 313        u32 in_front_crc, in_middle_crc, in_data_crc;  /* calculated crc */
 314
 315        char in_tag;         /* protocol control byte */
 316        int in_base_pos;     /* bytes read */
 317        __le64 in_temp_ack;  /* for reading an ack */
 318
 319        struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
 320
 321        struct delayed_work work;           /* send|recv work */
 322        unsigned long       delay;          /* current delay interval */
 323};
 324
 325
 326extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
 327
 328extern int ceph_parse_ips(const char *c, const char *end,
 329                          struct ceph_entity_addr *addr,
 330                          int max_count, int *count);
 331
 332
 333extern int ceph_msgr_init(void);
 334extern void ceph_msgr_exit(void);
 335extern void ceph_msgr_flush(void);
 336
 337extern void ceph_messenger_init(struct ceph_messenger *msgr,
 338                                struct ceph_entity_addr *myaddr);
 339extern void ceph_messenger_fini(struct ceph_messenger *msgr);
 340
 341extern void ceph_con_init(struct ceph_connection *con, void *private,
 342                        const struct ceph_connection_operations *ops,
 343                        struct ceph_messenger *msgr);
 344extern void ceph_con_open(struct ceph_connection *con,
 345                          __u8 entity_type, __u64 entity_num,
 346                          struct ceph_entity_addr *addr);
 347extern bool ceph_con_opened(struct ceph_connection *con);
 348extern void ceph_con_close(struct ceph_connection *con);
 349extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
 350
 351extern void ceph_msg_revoke(struct ceph_msg *msg);
 352extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
 353
 354extern void ceph_con_keepalive(struct ceph_connection *con);
 355extern bool ceph_con_keepalive_expired(struct ceph_connection *con,
 356                                       unsigned long interval);
 357
 358extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 359                                size_t length, size_t alignment);
 360extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 361                                struct ceph_pagelist *pagelist);
 362#ifdef CONFIG_BLOCK
 363void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 364                           u32 length);
 365#endif /* CONFIG_BLOCK */
 366void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 367                             struct ceph_bvec_iter *bvec_pos);
 368
 369struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
 370                               gfp_t flags, bool can_fail);
 371extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 372                                     bool can_fail);
 373
 374extern struct ceph_msg *ceph_msg_get(struct ceph_msg *msg);
 375extern void ceph_msg_put(struct ceph_msg *msg);
 376
 377extern void ceph_msg_dump(struct ceph_msg *msg);
 378
 379#endif
 380