linux/drivers/block/drbd/drbd_protocol.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef __DRBD_PROTOCOL_H
   3#define __DRBD_PROTOCOL_H
   4
   5enum drbd_packet {
   6        /* receiver (data socket) */
   7        P_DATA                = 0x00,
   8        P_DATA_REPLY          = 0x01, /* Response to P_DATA_REQUEST */
   9        P_RS_DATA_REPLY       = 0x02, /* Response to P_RS_DATA_REQUEST */
  10        P_BARRIER             = 0x03,
  11        P_BITMAP              = 0x04,
  12        P_BECOME_SYNC_TARGET  = 0x05,
  13        P_BECOME_SYNC_SOURCE  = 0x06,
  14        P_UNPLUG_REMOTE       = 0x07, /* Used at various times to hint the peer */
  15        P_DATA_REQUEST        = 0x08, /* Used to ask for a data block */
  16        P_RS_DATA_REQUEST     = 0x09, /* Used to ask for a data block for resync */
  17        P_SYNC_PARAM          = 0x0a,
  18        P_PROTOCOL            = 0x0b,
  19        P_UUIDS               = 0x0c,
  20        P_SIZES               = 0x0d,
  21        P_STATE               = 0x0e,
  22        P_SYNC_UUID           = 0x0f,
  23        P_AUTH_CHALLENGE      = 0x10,
  24        P_AUTH_RESPONSE       = 0x11,
  25        P_STATE_CHG_REQ       = 0x12,
  26
  27        /* (meta socket) */
  28        P_PING                = 0x13,
  29        P_PING_ACK            = 0x14,
  30        P_RECV_ACK            = 0x15, /* Used in protocol B */
  31        P_WRITE_ACK           = 0x16, /* Used in protocol C */
  32        P_RS_WRITE_ACK        = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
  33        P_SUPERSEDED          = 0x18, /* Used in proto C, two-primaries conflict detection */
  34        P_NEG_ACK             = 0x19, /* Sent if local disk is unusable */
  35        P_NEG_DREPLY          = 0x1a, /* Local disk is broken... */
  36        P_NEG_RS_DREPLY       = 0x1b, /* Local disk is broken... */
  37        P_BARRIER_ACK         = 0x1c,
  38        P_STATE_CHG_REPLY     = 0x1d,
  39
  40        /* "new" commands, no longer fitting into the ordering scheme above */
  41
  42        P_OV_REQUEST          = 0x1e, /* data socket */
  43        P_OV_REPLY            = 0x1f,
  44        P_OV_RESULT           = 0x20, /* meta socket */
  45        P_CSUM_RS_REQUEST     = 0x21, /* data socket */
  46        P_RS_IS_IN_SYNC       = 0x22, /* meta socket */
  47        P_SYNC_PARAM89        = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
  48        P_COMPRESSED_BITMAP   = 0x24, /* compressed or otherwise encoded bitmap transfer */
  49        /* P_CKPT_FENCE_REQ      = 0x25, * currently reserved for protocol D */
  50        /* P_CKPT_DISABLE_REQ    = 0x26, * currently reserved for protocol D */
  51        P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
  52        P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
  53        P_RS_CANCEL           = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
  54        P_CONN_ST_CHG_REQ     = 0x2a, /* data sock: Connection wide state request */
  55        P_CONN_ST_CHG_REPLY   = 0x2b, /* meta sock: Connection side state req reply */
  56        P_RETRY_WRITE         = 0x2c, /* Protocol C: retry conflicting write request */
  57        P_PROTOCOL_UPDATE     = 0x2d, /* data sock: is used in established connections */
  58        /* 0x2e to 0x30 reserved, used in drbd 9 */
  59
  60        /* REQ_OP_DISCARD. We used "discard" in different contexts before,
  61         * which is why I chose TRIM here, to disambiguate. */
  62        P_TRIM                = 0x31,
  63
  64        /* Only use these two if both support FF_THIN_RESYNC */
  65        P_RS_THIN_REQ         = 0x32, /* Request a block for resync or reply P_RS_DEALLOCATED */
  66        P_RS_DEALLOCATED      = 0x33, /* Contains only zeros on sync source node */
  67
  68        /* REQ_WRITE_SAME.
  69         * On a receiving side without REQ_WRITE_SAME,
  70         * we may fall back to an opencoded loop instead. */
  71        P_WSAME               = 0x34,
  72
  73        P_MAY_IGNORE          = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
  74        P_MAX_OPT_CMD         = 0x101,
  75
  76        /* special command ids for handshake */
  77
  78        P_INITIAL_META        = 0xfff1, /* First Packet on the MetaSock */
  79        P_INITIAL_DATA        = 0xfff2, /* First Packet on the Socket */
  80
  81        P_CONNECTION_FEATURES = 0xfffe  /* FIXED for the next century! */
  82};
  83
  84#ifndef __packed
  85#define __packed __attribute__((packed))
  86#endif
  87
  88/* This is the layout for a packet on the wire.
  89 * The byteorder is the network byte order.
  90 *     (except block_id and barrier fields.
  91 *      these are pointers to local structs
  92 *      and have no relevance for the partner,
  93 *      which just echoes them as received.)
  94 *
  95 * NOTE that the payload starts at a long aligned offset,
  96 * regardless of 32 or 64 bit arch!
  97 */
  98struct p_header80 {
  99        u32       magic;
 100        u16       command;
 101        u16       length;       /* bytes of data after this header */
 102} __packed;
 103
 104/* Header for big packets, Used for data packets exceeding 64kB */
 105struct p_header95 {
 106        u16       magic;        /* use DRBD_MAGIC_BIG here */
 107        u16       command;
 108        u32       length;
 109} __packed;
 110
 111struct p_header100 {
 112        u32       magic;
 113        u16       volume;
 114        u16       command;
 115        u32       length;
 116        u32       pad;
 117} __packed;
 118
 119/* These defines must not be changed without changing the protocol version.
 120 * New defines may only be introduced together with protocol version bump or
 121 * new protocol feature flags.
 122 */
 123#define DP_HARDBARRIER        1 /* no longer used */
 124#define DP_RW_SYNC            2 /* equals REQ_SYNC    */
 125#define DP_MAY_SET_IN_SYNC    4
 126#define DP_UNPLUG             8 /* not used anymore   */
 127#define DP_FUA               16 /* equals REQ_FUA     */
 128#define DP_FLUSH             32 /* equals REQ_PREFLUSH   */
 129#define DP_DISCARD           64 /* equals REQ_OP_DISCARD */
 130#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
 131#define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
 132#define DP_WSAME            512 /* equiv. REQ_WRITE_SAME */
 133
 134struct p_data {
 135        u64         sector;    /* 64 bits sector number */
 136        u64         block_id;  /* to identify the request in protocol B&C */
 137        u32         seq_num;
 138        u32         dp_flags;
 139} __packed;
 140
 141struct p_trim {
 142        struct p_data p_data;
 143        u32         size;       /* == bio->bi_size */
 144} __packed;
 145
 146struct p_wsame {
 147        struct p_data p_data;
 148        u32           size;     /* == bio->bi_size */
 149} __packed;
 150
 151/*
 152 * commands which share a struct:
 153 *  p_block_ack:
 154 *   P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
 155 *   P_SUPERSEDED (proto C, two-primaries conflict detection)
 156 *  p_block_req:
 157 *   P_DATA_REQUEST, P_RS_DATA_REQUEST
 158 */
 159struct p_block_ack {
 160        u64         sector;
 161        u64         block_id;
 162        u32         blksize;
 163        u32         seq_num;
 164} __packed;
 165
 166struct p_block_req {
 167        u64 sector;
 168        u64 block_id;
 169        u32 blksize;
 170        u32 pad;        /* to multiple of 8 Byte */
 171} __packed;
 172
 173/*
 174 * commands with their own struct for additional fields:
 175 *   P_CONNECTION_FEATURES
 176 *   P_BARRIER
 177 *   P_BARRIER_ACK
 178 *   P_SYNC_PARAM
 179 *   ReportParams
 180 */
 181
 182/* supports TRIM/DISCARD on the "wire" protocol */
 183#define DRBD_FF_TRIM 1
 184
 185/* Detect all-zeros during resync, and rather TRIM/UNMAP/DISCARD those blocks
 186 * instead of fully allocate a supposedly thin volume on initial resync */
 187#define DRBD_FF_THIN_RESYNC 2
 188
 189/* supports REQ_WRITE_SAME on the "wire" protocol.
 190 * Note: this flag is overloaded,
 191 * its presence also
 192 *   - indicates support for 128 MiB "batch bios",
 193 *     max discard size of 128 MiB
 194 *     instead of 4M before that.
 195 *   - indicates that we exchange additional settings in p_sizes
 196 *     drbd_send_sizes()/receive_sizes()
 197 */
 198#define DRBD_FF_WSAME 4
 199
 200struct p_connection_features {
 201        u32 protocol_min;
 202        u32 feature_flags;
 203        u32 protocol_max;
 204
 205        /* should be more than enough for future enhancements
 206         * for now, feature_flags and the reserved array shall be zero.
 207         */
 208
 209        u32 _pad;
 210        u64 reserved[7];
 211} __packed;
 212
 213struct p_barrier {
 214        u32 barrier;    /* barrier number _handle_ only */
 215        u32 pad;        /* to multiple of 8 Byte */
 216} __packed;
 217
 218struct p_barrier_ack {
 219        u32 barrier;
 220        u32 set_size;
 221} __packed;
 222
 223struct p_rs_param {
 224        u32 resync_rate;
 225
 226              /* Since protocol version 88 and higher. */
 227        char verify_alg[0];
 228} __packed;
 229
 230struct p_rs_param_89 {
 231        u32 resync_rate;
 232        /* protocol version 89: */
 233        char verify_alg[SHARED_SECRET_MAX];
 234        char csums_alg[SHARED_SECRET_MAX];
 235} __packed;
 236
 237struct p_rs_param_95 {
 238        u32 resync_rate;
 239        char verify_alg[SHARED_SECRET_MAX];
 240        char csums_alg[SHARED_SECRET_MAX];
 241        u32 c_plan_ahead;
 242        u32 c_delay_target;
 243        u32 c_fill_target;
 244        u32 c_max_rate;
 245} __packed;
 246
 247enum drbd_conn_flags {
 248        CF_DISCARD_MY_DATA = 1,
 249        CF_DRY_RUN = 2,
 250};
 251
 252struct p_protocol {
 253        u32 protocol;
 254        u32 after_sb_0p;
 255        u32 after_sb_1p;
 256        u32 after_sb_2p;
 257        u32 conn_flags;
 258        u32 two_primaries;
 259
 260        /* Since protocol version 87 and higher. */
 261        char integrity_alg[0];
 262
 263} __packed;
 264
 265struct p_uuids {
 266        u64 uuid[UI_EXTENDED_SIZE];
 267} __packed;
 268
 269struct p_rs_uuid {
 270        u64         uuid;
 271} __packed;
 272
 273/* optional queue_limits if (agreed_features & DRBD_FF_WSAME)
 274 * see also struct queue_limits, as of late 2015 */
 275struct o_qlim {
 276        /* we don't need it yet, but we may as well communicate it now */
 277        u32 physical_block_size;
 278
 279        /* so the original in struct queue_limits is unsigned short,
 280         * but I'd have to put in padding anyways. */
 281        u32 logical_block_size;
 282
 283        /* One incoming bio becomes one DRBD request,
 284         * which may be translated to several bio on the receiving side.
 285         * We don't need to communicate chunk/boundary/segment ... limits.
 286         */
 287
 288        /* various IO hints may be useful with "diskless client" setups */
 289        u32 alignment_offset;
 290        u32 io_min;
 291        u32 io_opt;
 292
 293        /* We may need to communicate integrity stuff at some point,
 294         * but let's not get ahead of ourselves. */
 295
 296        /* Backend discard capabilities.
 297         * Receiving side uses "blkdev_issue_discard()", no need to communicate
 298         * more specifics.  If the backend cannot do discards, the DRBD peer
 299         * may fall back to blkdev_issue_zeroout().
 300         */
 301        u8 discard_enabled;
 302        u8 discard_zeroes_data;
 303        u8 write_same_capable;
 304        u8 _pad;
 305} __packed;
 306
 307struct p_sizes {
 308        u64         d_size;  /* size of disk */
 309        u64         u_size;  /* user requested size */
 310        u64         c_size;  /* current exported size */
 311        u32         max_bio_size;  /* Maximal size of a BIO */
 312        u16         queue_order_type;  /* not yet implemented in DRBD*/
 313        u16         dds_flags; /* use enum dds_flags here. */
 314
 315        /* optional queue_limits if (agreed_features & DRBD_FF_WSAME) */
 316        struct o_qlim qlim[0];
 317} __packed;
 318
 319struct p_state {
 320        u32         state;
 321} __packed;
 322
 323struct p_req_state {
 324        u32         mask;
 325        u32         val;
 326} __packed;
 327
 328struct p_req_state_reply {
 329        u32         retcode;
 330} __packed;
 331
 332struct p_drbd06_param {
 333        u64       size;
 334        u32       state;
 335        u32       blksize;
 336        u32       protocol;
 337        u32       version;
 338        u32       gen_cnt[5];
 339        u32       bit_map_gen[5];
 340} __packed;
 341
 342struct p_block_desc {
 343        u64 sector;
 344        u32 blksize;
 345        u32 pad;        /* to multiple of 8 Byte */
 346} __packed;
 347
 348/* Valid values for the encoding field.
 349 * Bump proto version when changing this. */
 350enum drbd_bitmap_code {
 351        /* RLE_VLI_Bytes = 0,
 352         * and other bit variants had been defined during
 353         * algorithm evaluation. */
 354        RLE_VLI_Bits = 2,
 355};
 356
 357struct p_compressed_bm {
 358        /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
 359         * (encoding & 0x80): polarity (set/unset) of first runlength
 360         * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
 361         * used to pad up to head.length bytes
 362         */
 363        u8 encoding;
 364
 365        u8 code[0];
 366} __packed;
 367
 368struct p_delay_probe93 {
 369        u32     seq_num; /* sequence number to match the two probe packets */
 370        u32     offset;  /* usecs the probe got sent after the reference time point */
 371} __packed;
 372
 373/*
 374 * Bitmap packets need to fit within a single page on the sender and receiver,
 375 * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger).
 376 */
 377#define DRBD_SOCKET_BUFFER_SIZE 4096
 378
 379#endif  /* __DRBD_PROTOCOL_H */
 380