linux/drivers/block/drbd/drbd_protocol.h
<<
>>
Prefs
   1#ifndef __DRBD_PROTOCOL_H
   2#define __DRBD_PROTOCOL_H
   3
   4enum drbd_packet {
   5        /* receiver (data socket) */
   6        P_DATA                = 0x00,
   7        P_DATA_REPLY          = 0x01, /* Response to P_DATA_REQUEST */
   8        P_RS_DATA_REPLY       = 0x02, /* Response to P_RS_DATA_REQUEST */
   9        P_BARRIER             = 0x03,
  10        P_BITMAP              = 0x04,
  11        P_BECOME_SYNC_TARGET  = 0x05,
  12        P_BECOME_SYNC_SOURCE  = 0x06,
  13        P_UNPLUG_REMOTE       = 0x07, /* Used at various times to hint the peer */
  14        P_DATA_REQUEST        = 0x08, /* Used to ask for a data block */
  15        P_RS_DATA_REQUEST     = 0x09, /* Used to ask for a data block for resync */
  16        P_SYNC_PARAM          = 0x0a,
  17        P_PROTOCOL            = 0x0b,
  18        P_UUIDS               = 0x0c,
  19        P_SIZES               = 0x0d,
  20        P_STATE               = 0x0e,
  21        P_SYNC_UUID           = 0x0f,
  22        P_AUTH_CHALLENGE      = 0x10,
  23        P_AUTH_RESPONSE       = 0x11,
  24        P_STATE_CHG_REQ       = 0x12,
  25
  26        /* (meta socket) */
  27        P_PING                = 0x13,
  28        P_PING_ACK            = 0x14,
  29        P_RECV_ACK            = 0x15, /* Used in protocol B */
  30        P_WRITE_ACK           = 0x16, /* Used in protocol C */
  31        P_RS_WRITE_ACK        = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
  32        P_SUPERSEDED          = 0x18, /* Used in proto C, two-primaries conflict detection */
  33        P_NEG_ACK             = 0x19, /* Sent if local disk is unusable */
  34        P_NEG_DREPLY          = 0x1a, /* Local disk is broken... */
  35        P_NEG_RS_DREPLY       = 0x1b, /* Local disk is broken... */
  36        P_BARRIER_ACK         = 0x1c,
  37        P_STATE_CHG_REPLY     = 0x1d,
  38
  39        /* "new" commands, no longer fitting into the ordering scheme above */
  40
  41        P_OV_REQUEST          = 0x1e, /* data socket */
  42        P_OV_REPLY            = 0x1f,
  43        P_OV_RESULT           = 0x20, /* meta socket */
  44        P_CSUM_RS_REQUEST     = 0x21, /* data socket */
  45        P_RS_IS_IN_SYNC       = 0x22, /* meta socket */
  46        P_SYNC_PARAM89        = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
  47        P_COMPRESSED_BITMAP   = 0x24, /* compressed or otherwise encoded bitmap transfer */
  48        /* P_CKPT_FENCE_REQ      = 0x25, * currently reserved for protocol D */
  49        /* P_CKPT_DISABLE_REQ    = 0x26, * currently reserved for protocol D */
  50        P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
  51        P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
  52        P_RS_CANCEL           = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
  53        P_CONN_ST_CHG_REQ     = 0x2a, /* data sock: Connection wide state request */
  54        P_CONN_ST_CHG_REPLY   = 0x2b, /* meta sock: Connection side state req reply */
  55        P_RETRY_WRITE         = 0x2c, /* Protocol C: retry conflicting write request */
  56        P_PROTOCOL_UPDATE     = 0x2d, /* data sock: is used in established connections */
  57        /* 0x2e to 0x30 reserved, used in drbd 9 */
  58
  59        /* REQ_DISCARD. We used "discard" in different contexts before,
  60         * which is why I chose TRIM here, to disambiguate. */
  61        P_TRIM                = 0x31,
  62
  63        /* Only use these two if both support FF_THIN_RESYNC */
  64        P_RS_THIN_REQ         = 0x32, /* Request a block for resync or reply P_RS_DEALLOCATED */
  65        P_RS_DEALLOCATED      = 0x33, /* Contains only zeros on sync source node */
  66
  67        /* REQ_WRITE_SAME.
  68         * On a receiving side without REQ_WRITE_SAME,
  69         * we may fall back to an opencoded loop instead. */
  70        P_WSAME               = 0x34,
  71
  72        P_MAY_IGNORE          = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
  73        P_MAX_OPT_CMD         = 0x101,
  74
  75        /* special command ids for handshake */
  76
  77        P_INITIAL_META        = 0xfff1, /* First Packet on the MetaSock */
  78        P_INITIAL_DATA        = 0xfff2, /* First Packet on the Socket */
  79
  80        P_CONNECTION_FEATURES = 0xfffe  /* FIXED for the next century! */
  81};
  82
  83#ifndef __packed
  84#define __packed __attribute__((packed))
  85#endif
  86
  87/* This is the layout for a packet on the wire.
  88 * The byteorder is the network byte order.
  89 *     (except block_id and barrier fields.
  90 *      these are pointers to local structs
  91 *      and have no relevance for the partner,
  92 *      which just echoes them as received.)
  93 *
  94 * NOTE that the payload starts at a long aligned offset,
  95 * regardless of 32 or 64 bit arch!
  96 */
  97struct p_header80 {
  98        u32       magic;
  99        u16       command;
 100        u16       length;       /* bytes of data after this header */
 101} __packed;
 102
 103/* Header for big packets, Used for data packets exceeding 64kB */
 104struct p_header95 {
 105        u16       magic;        /* use DRBD_MAGIC_BIG here */
 106        u16       command;
 107        u32       length;
 108} __packed;
 109
 110struct p_header100 {
 111        u32       magic;
 112        u16       volume;
 113        u16       command;
 114        u32       length;
 115        u32       pad;
 116} __packed;
 117
 118/* These defines must not be changed without changing the protocol version.
 119 * New defines may only be introduced together with protocol version bump or
 120 * new protocol feature flags.
 121 */
 122#define DP_HARDBARRIER        1 /* no longer used */
 123#define DP_RW_SYNC            2 /* equals REQ_SYNC    */
 124#define DP_MAY_SET_IN_SYNC    4
 125#define DP_UNPLUG             8 /* not used anymore   */
 126#define DP_FUA               16 /* equals REQ_FUA     */
 127#define DP_FLUSH             32 /* equals REQ_PREFLUSH   */
 128#define DP_DISCARD           64 /* equals REQ_DISCARD */
 129#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
 130#define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
 131#define DP_WSAME            512 /* equiv. REQ_WRITE_SAME */
 132
 133struct p_data {
 134        u64         sector;    /* 64 bits sector number */
 135        u64         block_id;  /* to identify the request in protocol B&C */
 136        u32         seq_num;
 137        u32         dp_flags;
 138} __packed;
 139
 140struct p_trim {
 141        struct p_data p_data;
 142        u32         size;       /* == bio->bi_size */
 143} __packed;
 144
 145struct p_wsame {
 146        struct p_data p_data;
 147        u32           size;     /* == bio->bi_size */
 148} __packed;
 149
 150/*
 151 * commands which share a struct:
 152 *  p_block_ack:
 153 *   P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
 154 *   P_SUPERSEDED (proto C, two-primaries conflict detection)
 155 *  p_block_req:
 156 *   P_DATA_REQUEST, P_RS_DATA_REQUEST
 157 */
 158struct p_block_ack {
 159        u64         sector;
 160        u64         block_id;
 161        u32         blksize;
 162        u32         seq_num;
 163} __packed;
 164
 165struct p_block_req {
 166        u64 sector;
 167        u64 block_id;
 168        u32 blksize;
 169        u32 pad;        /* to multiple of 8 Byte */
 170} __packed;
 171
 172/*
 173 * commands with their own struct for additional fields:
 174 *   P_CONNECTION_FEATURES
 175 *   P_BARRIER
 176 *   P_BARRIER_ACK
 177 *   P_SYNC_PARAM
 178 *   ReportParams
 179 */
 180
 181/* supports TRIM/DISCARD on the "wire" protocol */
 182#define DRBD_FF_TRIM 1
 183
 184/* Detect all-zeros during resync, and rather TRIM/UNMAP/DISCARD those blocks
 185 * instead of fully allocate a supposedly thin volume on initial resync */
 186#define DRBD_FF_THIN_RESYNC 2
 187
 188/* supports REQ_WRITE_SAME on the "wire" protocol.
 189 * Note: this flag is overloaded,
 190 * its presence also
 191 *   - indicates support for 128 MiB "batch bios",
 192 *     max discard size of 128 MiB
 193 *     instead of 4M before that.
 194 *   - indicates that we exchange additional settings in p_sizes
 195 *     drbd_send_sizes()/receive_sizes()
 196 */
 197#define DRBD_FF_WSAME 4
 198
 199struct p_connection_features {
 200        u32 protocol_min;
 201        u32 feature_flags;
 202        u32 protocol_max;
 203
 204        /* should be more than enough for future enhancements
 205         * for now, feature_flags and the reserved array shall be zero.
 206         */
 207
 208        u32 _pad;
 209        u64 reserved[7];
 210} __packed;
 211
 212struct p_barrier {
 213        u32 barrier;    /* barrier number _handle_ only */
 214        u32 pad;        /* to multiple of 8 Byte */
 215} __packed;
 216
 217struct p_barrier_ack {
 218        u32 barrier;
 219        u32 set_size;
 220} __packed;
 221
 222struct p_rs_param {
 223        u32 resync_rate;
 224
 225              /* Since protocol version 88 and higher. */
 226        char verify_alg[0];
 227} __packed;
 228
 229struct p_rs_param_89 {
 230        u32 resync_rate;
 231        /* protocol version 89: */
 232        char verify_alg[SHARED_SECRET_MAX];
 233        char csums_alg[SHARED_SECRET_MAX];
 234} __packed;
 235
 236struct p_rs_param_95 {
 237        u32 resync_rate;
 238        char verify_alg[SHARED_SECRET_MAX];
 239        char csums_alg[SHARED_SECRET_MAX];
 240        u32 c_plan_ahead;
 241        u32 c_delay_target;
 242        u32 c_fill_target;
 243        u32 c_max_rate;
 244} __packed;
 245
 246enum drbd_conn_flags {
 247        CF_DISCARD_MY_DATA = 1,
 248        CF_DRY_RUN = 2,
 249};
 250
 251struct p_protocol {
 252        u32 protocol;
 253        u32 after_sb_0p;
 254        u32 after_sb_1p;
 255        u32 after_sb_2p;
 256        u32 conn_flags;
 257        u32 two_primaries;
 258
 259        /* Since protocol version 87 and higher. */
 260        char integrity_alg[0];
 261
 262} __packed;
 263
 264struct p_uuids {
 265        u64 uuid[UI_EXTENDED_SIZE];
 266} __packed;
 267
 268struct p_rs_uuid {
 269        u64         uuid;
 270} __packed;
 271
 272/* optional queue_limits if (agreed_features & DRBD_FF_WSAME)
 273 * see also struct queue_limits, as of late 2015 */
 274struct o_qlim {
 275        /* we don't need it yet, but we may as well communicate it now */
 276        u32 physical_block_size;
 277
 278        /* so the original in struct queue_limits is unsigned short,
 279         * but I'd have to put in padding anyways. */
 280        u32 logical_block_size;
 281
 282        /* One incoming bio becomes one DRBD request,
 283         * which may be translated to several bio on the receiving side.
 284         * We don't need to communicate chunk/boundary/segment ... limits.
 285         */
 286
 287        /* various IO hints may be useful with "diskless client" setups */
 288        u32 alignment_offset;
 289        u32 io_min;
 290        u32 io_opt;
 291
 292        /* We may need to communicate integrity stuff at some point,
 293         * but let's not get ahead of ourselves. */
 294
 295        /* Backend discard capabilities.
 296         * Receiving side uses "blkdev_issue_discard()", no need to communicate
 297         * more specifics.  If the backend cannot do discards, the DRBD peer
 298         * may fall back to blkdev_issue_zeroout().
 299         */
 300        u8 discard_enabled;
 301        u8 discard_zeroes_data;
 302        u8 write_same_capable;
 303        u8 _pad;
 304} __packed;
 305
 306struct p_sizes {
 307        u64         d_size;  /* size of disk */
 308        u64         u_size;  /* user requested size */
 309        u64         c_size;  /* current exported size */
 310        u32         max_bio_size;  /* Maximal size of a BIO */
 311        u16         queue_order_type;  /* not yet implemented in DRBD*/
 312        u16         dds_flags; /* use enum dds_flags here. */
 313
 314        /* optional queue_limits if (agreed_features & DRBD_FF_WSAME) */
 315        struct o_qlim qlim[0];
 316} __packed;
 317
 318struct p_state {
 319        u32         state;
 320} __packed;
 321
 322struct p_req_state {
 323        u32         mask;
 324        u32         val;
 325} __packed;
 326
 327struct p_req_state_reply {
 328        u32         retcode;
 329} __packed;
 330
 331struct p_drbd06_param {
 332        u64       size;
 333        u32       state;
 334        u32       blksize;
 335        u32       protocol;
 336        u32       version;
 337        u32       gen_cnt[5];
 338        u32       bit_map_gen[5];
 339} __packed;
 340
 341struct p_block_desc {
 342        u64 sector;
 343        u32 blksize;
 344        u32 pad;        /* to multiple of 8 Byte */
 345} __packed;
 346
 347/* Valid values for the encoding field.
 348 * Bump proto version when changing this. */
 349enum drbd_bitmap_code {
 350        /* RLE_VLI_Bytes = 0,
 351         * and other bit variants had been defined during
 352         * algorithm evaluation. */
 353        RLE_VLI_Bits = 2,
 354};
 355
 356struct p_compressed_bm {
 357        /* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
 358         * (encoding & 0x80): polarity (set/unset) of first runlength
 359         * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
 360         * used to pad up to head.length bytes
 361         */
 362        u8 encoding;
 363
 364        u8 code[0];
 365} __packed;
 366
 367struct p_delay_probe93 {
 368        u32     seq_num; /* sequence number to match the two probe packets */
 369        u32     offset;  /* usecs the probe got sent after the reference time point */
 370} __packed;
 371
 372/*
 373 * Bitmap packets need to fit within a single page on the sender and receiver,
 374 * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger).
 375 */
 376#define DRBD_SOCKET_BUFFER_SIZE 4096
 377
 378#endif  /* __DRBD_PROTOCOL_H */
 379