1/* 2 * GPL HEADER START 3 * 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 only, 8 * as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but 11 * WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * General Public License version 2 for more details (a copy is included 14 * in the LICENSE file that accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License 17 * version 2 along with this program; If not, see 18 * http://www.gnu.org/licenses/gpl-2.0.html 19 * 20 * GPL HEADER END 21 */ 22/* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright (c) 2012 - 2015, Intel Corporation. 27 */ 28/* 29 * This file is part of Lustre, http://www.lustre.org/ 30 * Lustre is a trademark of Seagate, Inc. 31 */ 32 33#ifndef __LNET_TYPES_H__ 34#define __LNET_TYPES_H__ 35 36#include <linux/types.h> 37#include <linux/bvec.h> 38 39/** \addtogroup lnet 40 * @{ 41 */ 42 43#define LNET_VERSION "0.6.0" 44 45/** \addtogroup lnet_addr 46 * @{ 47 */ 48 49/** Portal reserved for LNet's own use. 50 * \see lustre/include/lustre/lustre_idl.h for Lustre portal assignments. 51 */ 52#define LNET_RESERVED_PORTAL 0 53 54/** 55 * Address of an end-point in an LNet network. 56 * 57 * A node can have multiple end-points and hence multiple addresses. 58 * An LNet network can be a simple network (e.g. tcp0) or a network of 59 * LNet networks connected by LNet routers. Therefore an end-point address 60 * has two parts: network ID, and address within a network. 61 * 62 * \see LNET_NIDNET, LNET_NIDADDR, and LNET_MKNID. 63 */ 64typedef __u64 lnet_nid_t; 65/** 66 * ID of a process in a node. Shortened as PID to distinguish from 67 * lnet_process_id, the global process ID. 68 */ 69typedef __u32 lnet_pid_t; 70 71/** wildcard NID that matches any end-point address */ 72#define LNET_NID_ANY ((lnet_nid_t)(-1)) 73/** wildcard PID that matches any lnet_pid_t */ 74#define LNET_PID_ANY ((lnet_pid_t)(-1)) 75 76#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */ 77#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */ 78#define LNET_PID_LUSTRE 12345 79 80#define LNET_TIME_FOREVER (-1) 81 82/* how an LNET NID encodes net:address */ 83/** extract the address part of an lnet_nid_t */ 84 85static inline __u32 LNET_NIDADDR(lnet_nid_t nid) 86{ 87 return nid & 0xffffffff; 88} 89 90static inline __u32 LNET_NIDNET(lnet_nid_t nid) 91{ 92 return (nid >> 32) & 0xffffffff; 93} 94 95static inline lnet_nid_t LNET_MKNID(__u32 net, __u32 addr) 96{ 97 return (((__u64)net) << 32) | addr; 98} 99 100static inline __u32 LNET_NETNUM(__u32 net) 101{ 102 return net & 0xffff; 103} 104 105static inline __u32 LNET_NETTYP(__u32 net) 106{ 107 return (net >> 16) & 0xffff; 108} 109 110static inline __u32 LNET_MKNET(__u32 type, __u32 num) 111{ 112 return (type << 16) | num; 113} 114 115#define WIRE_ATTR __packed 116 117/* Packed version of lnet_process_id to transfer via network */ 118struct lnet_process_id_packed { 119 /* node id / process id */ 120 lnet_nid_t nid; 121 lnet_pid_t pid; 122} WIRE_ATTR; 123 124/* 125 * The wire handle's interface cookie only matches one network interface in 126 * one epoch (i.e. new cookie when the interface restarts or the node 127 * reboots). The object cookie only matches one object on that interface 128 * during that object's lifetime (i.e. no cookie re-use). 129 */ 130struct lnet_handle_wire { 131 __u64 wh_interface_cookie; 132 __u64 wh_object_cookie; 133} WIRE_ATTR; 134 135enum lnet_msg_type { 136 LNET_MSG_ACK = 0, 137 LNET_MSG_PUT, 138 LNET_MSG_GET, 139 LNET_MSG_REPLY, 140 LNET_MSG_HELLO, 141}; 142 143/* 144 * The variant fields of the portals message header are aligned on an 8 145 * byte boundary in the message header. Note that all types used in these 146 * wire structs MUST be fixed size and the smaller types are placed at the 147 * end. 148 */ 149struct lnet_ack { 150 struct lnet_handle_wire dst_wmd; 151 __u64 match_bits; 152 __u32 mlength; 153} WIRE_ATTR; 154 155struct lnet_put { 156 struct lnet_handle_wire ack_wmd; 157 __u64 match_bits; 158 __u64 hdr_data; 159 __u32 ptl_index; 160 __u32 offset; 161} WIRE_ATTR; 162 163struct lnet_get { 164 struct lnet_handle_wire return_wmd; 165 __u64 match_bits; 166 __u32 ptl_index; 167 __u32 src_offset; 168 __u32 sink_length; 169} WIRE_ATTR; 170 171struct lnet_reply { 172 struct lnet_handle_wire dst_wmd; 173} WIRE_ATTR; 174 175struct lnet_hello { 176 __u64 incarnation; 177 __u32 type; 178} WIRE_ATTR; 179 180struct lnet_hdr { 181 lnet_nid_t dest_nid; 182 lnet_nid_t src_nid; 183 lnet_pid_t dest_pid; 184 lnet_pid_t src_pid; 185 __u32 type; /* enum lnet_msg_type */ 186 __u32 payload_length; /* payload data to follow */ 187 /*<------__u64 aligned------->*/ 188 union { 189 struct lnet_ack ack; 190 struct lnet_put put; 191 struct lnet_get get; 192 struct lnet_reply reply; 193 struct lnet_hello hello; 194 } msg; 195} WIRE_ATTR; 196 197/* 198 * A HELLO message contains a magic number and protocol version 199 * code in the header's dest_nid, the peer's NID in the src_nid, and 200 * LNET_MSG_HELLO in the type field. All other common fields are zero 201 * (including payload_size; i.e. no payload). 202 * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is 203 * running the same protocol and to find out its NID. These LNDs should 204 * exchange HELLO messages when a connection is first established. Individual 205 * LNDs can put whatever else they fancy in struct lnet_hdr::msg. 206 */ 207struct lnet_magicversion { 208 __u32 magic; /* LNET_PROTO_TCP_MAGIC */ 209 __u16 version_major; /* increment on incompatible change */ 210 __u16 version_minor; /* increment on compatible change */ 211} WIRE_ATTR; 212 213/* PROTO MAGIC for LNDs */ 214#define LNET_PROTO_IB_MAGIC 0x0be91b91 215#define LNET_PROTO_GNI_MAGIC 0xb00fbabe /* ask Kim */ 216#define LNET_PROTO_TCP_MAGIC 0xeebc0ded 217#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100 218#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */ 219 220/* Placeholder for a future "unified" protocol across all LNDs */ 221/* 222 * Current LNDs that receive a request with this magic will respond with a 223 * "stub" reply using their current protocol 224 */ 225#define LNET_PROTO_MAGIC 0x45726963 /* ! */ 226 227#define LNET_PROTO_TCP_VERSION_MAJOR 1 228#define LNET_PROTO_TCP_VERSION_MINOR 0 229 230/* Acceptor connection request */ 231struct lnet_acceptor_connreq { 232 __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */ 233 __u32 acr_version; /* protocol version */ 234 __u64 acr_nid; /* target NID */ 235} WIRE_ATTR; 236 237#define LNET_PROTO_ACCEPTOR_VERSION 1 238 239struct lnet_ni_status { 240 lnet_nid_t ns_nid; 241 __u32 ns_status; 242 __u32 ns_unused; 243} WIRE_ATTR; 244 245struct lnet_ping_info { 246 __u32 pi_magic; 247 __u32 pi_features; 248 lnet_pid_t pi_pid; 249 __u32 pi_nnis; 250 struct lnet_ni_status pi_ni[0]; 251} WIRE_ATTR; 252 253struct lnet_counters { 254 __u32 msgs_alloc; 255 __u32 msgs_max; 256 __u32 errors; 257 __u32 send_count; 258 __u32 recv_count; 259 __u32 route_count; 260 __u32 drop_count; 261 __u64 send_length; 262 __u64 recv_length; 263 __u64 route_length; 264 __u64 drop_length; 265} WIRE_ATTR; 266 267#define LNET_NI_STATUS_UP 0x15aac0de 268#define LNET_NI_STATUS_DOWN 0xdeadface 269#define LNET_NI_STATUS_INVALID 0x00000000 270 271#define LNET_MAX_INTERFACES 16 272 273/** 274 * Objects maintained by the LNet are accessed through handles. Handle types 275 * have names of the form lnet_handle_xx, where xx is one of the two letter 276 * object type codes ('eq' for event queue, 'md' for memory descriptor, and 277 * 'me' for match entry). Each type of object is given a unique handle type 278 * to enhance type checking. 279 */ 280#define LNET_WIRE_HANDLE_COOKIE_NONE (-1) 281 282struct lnet_handle_eq { 283 u64 cookie; 284}; 285 286/** 287 * Invalidate eq handle @h. 288 */ 289static inline void LNetInvalidateEQHandle(struct lnet_handle_eq *h) 290{ 291 h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE; 292} 293 294/** 295 * Check whether eq handle @h is invalid. 296 * 297 * @return 1 if handle is invalid, 0 if valid. 298 */ 299static inline int LNetEQHandleIsInvalid(struct lnet_handle_eq h) 300{ 301 return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie); 302} 303 304struct lnet_handle_md { 305 u64 cookie; 306}; 307 308/** 309 * Invalidate md handle @h. 310 */ 311static inline void LNetInvalidateMDHandle(struct lnet_handle_md *h) 312{ 313 h->cookie = LNET_WIRE_HANDLE_COOKIE_NONE; 314} 315 316/** 317 * Check whether eq handle @h is invalid. 318 * 319 * @return 1 if handle is invalid, 0 if valid. 320 */ 321static inline int LNetMDHandleIsInvalid(struct lnet_handle_md h) 322{ 323 return (LNET_WIRE_HANDLE_COOKIE_NONE == h.cookie); 324} 325 326struct lnet_handle_me { 327 u64 cookie; 328}; 329 330/** 331 * Global process ID. 332 */ 333struct lnet_process_id { 334 /** node id */ 335 lnet_nid_t nid; 336 /** process id */ 337 lnet_pid_t pid; 338}; 339/** @} lnet_addr */ 340 341/** \addtogroup lnet_me 342 * @{ 343 */ 344 345/** 346 * Specifies whether the match entry or memory descriptor should be unlinked 347 * automatically (LNET_UNLINK) or not (LNET_RETAIN). 348 */ 349enum lnet_unlink { 350 LNET_RETAIN = 0, 351 LNET_UNLINK 352}; 353 354/** 355 * Values of the type lnet_ins_pos are used to control where a new match 356 * entry is inserted. The value LNET_INS_BEFORE is used to insert the new 357 * entry before the current entry or before the head of the list. The value 358 * LNET_INS_AFTER is used to insert the new entry after the current entry 359 * or after the last item in the list. 360 */ 361enum lnet_ins_pos { 362 /** insert ME before current position or head of the list */ 363 LNET_INS_BEFORE, 364 /** insert ME after current position or tail of the list */ 365 LNET_INS_AFTER, 366 /** attach ME at tail of local CPU partition ME list */ 367 LNET_INS_LOCAL 368}; 369 370/** @} lnet_me */ 371 372/** \addtogroup lnet_md 373 * @{ 374 */ 375 376/** 377 * Defines the visible parts of a memory descriptor. Values of this type 378 * are used to initialize memory descriptors. 379 */ 380struct lnet_md { 381 /** 382 * Specify the memory region associated with the memory descriptor. 383 * If the options field has: 384 * - LNET_MD_KIOV bit set: The start field points to the starting 385 * address of an array of struct bio_vec and the length field specifies 386 * the number of entries in the array. The length can't be bigger 387 * than LNET_MAX_IOV. The struct bio_vec is used to describe page-based 388 * fragments that are not necessarily mapped in virtual memory. 389 * - LNET_MD_IOVEC bit set: The start field points to the starting 390 * address of an array of struct iovec and the length field specifies 391 * the number of entries in the array. The length can't be bigger 392 * than LNET_MAX_IOV. The struct iovec is used to describe fragments 393 * that have virtual addresses. 394 * - Otherwise: The memory region is contiguous. The start field 395 * specifies the starting address for the memory region and the 396 * length field specifies its length. 397 * 398 * When the memory region is fragmented, all fragments but the first 399 * one must start on page boundary, and all but the last must end on 400 * page boundary. 401 */ 402 void *start; 403 unsigned int length; 404 /** 405 * Specifies the maximum number of operations that can be performed 406 * on the memory descriptor. An operation is any action that could 407 * possibly generate an event. In the usual case, the threshold value 408 * is decremented for each operation on the MD. When the threshold 409 * drops to zero, the MD becomes inactive and does not respond to 410 * operations. A threshold value of LNET_MD_THRESH_INF indicates that 411 * there is no bound on the number of operations that may be applied 412 * to a MD. 413 */ 414 int threshold; 415 /** 416 * Specifies the largest incoming request that the memory descriptor 417 * should respond to. When the unused portion of a MD (length - 418 * local offset) falls below this value, the MD becomes inactive and 419 * does not respond to further operations. This value is only used 420 * if the LNET_MD_MAX_SIZE option is set. 421 */ 422 int max_size; 423 /** 424 * Specifies the behavior of the memory descriptor. A bitwise OR 425 * of the following values can be used: 426 * - LNET_MD_OP_PUT: The LNet PUT operation is allowed on this MD. 427 * - LNET_MD_OP_GET: The LNet GET operation is allowed on this MD. 428 * - LNET_MD_MANAGE_REMOTE: The offset used in accessing the memory 429 * region is provided by the incoming request. By default, the 430 * offset is maintained locally. When maintained locally, the 431 * offset is incremented by the length of the request so that 432 * the next operation (PUT or GET) will access the next part of 433 * the memory region. Note that only one offset variable exists 434 * per memory descriptor. If both PUT and GET operations are 435 * performed on a memory descriptor, the offset is updated each time. 436 * - LNET_MD_TRUNCATE: The length provided in the incoming request can 437 * be reduced to match the memory available in the region (determined 438 * by subtracting the offset from the length of the memory region). 439 * By default, if the length in the incoming operation is greater 440 * than the amount of memory available, the operation is rejected. 441 * - LNET_MD_ACK_DISABLE: An acknowledgment should not be sent for 442 * incoming PUT operations, even if requested. By default, 443 * acknowledgments are sent for PUT operations that request an 444 * acknowledgment. Acknowledgments are never sent for GET operations. 445 * The data sent in the REPLY serves as an implicit acknowledgment. 446 * - LNET_MD_KIOV: The start and length fields specify an array of 447 * struct bio_vec. 448 * - LNET_MD_IOVEC: The start and length fields specify an array of 449 * struct iovec. 450 * - LNET_MD_MAX_SIZE: The max_size field is valid. 451 * 452 * Note: 453 * - LNET_MD_KIOV or LNET_MD_IOVEC allows for a scatter/gather 454 * capability for memory descriptors. They can't be both set. 455 * - When LNET_MD_MAX_SIZE is set, the total length of the memory 456 * region (i.e. sum of all fragment lengths) must not be less than 457 * \a max_size. 458 */ 459 unsigned int options; 460 /** 461 * A user-specified value that is associated with the memory 462 * descriptor. The value does not need to be a pointer, but must fit 463 * in the space used by a pointer. This value is recorded in events 464 * associated with operations on this MD. 465 */ 466 void *user_ptr; 467 /** 468 * A handle for the event queue used to log the operations performed on 469 * the memory region. If this argument is a NULL handle (i.e. nullified 470 * by LNetInvalidateHandle()), operations performed on this memory 471 * descriptor are not logged. 472 */ 473 struct lnet_handle_eq eq_handle; 474}; 475 476/* 477 * Max Transfer Unit (minimum supported everywhere). 478 * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) 479 * these limits are system wide and not interface-local. 480 */ 481#define LNET_MTU_BITS 20 482#define LNET_MTU (1 << LNET_MTU_BITS) 483 484/** limit on the number of fragments in discontiguous MDs */ 485#define LNET_MAX_IOV 256 486 487/** 488 * Options for the MD structure. See lnet_md::options. 489 */ 490#define LNET_MD_OP_PUT (1 << 0) 491/** See lnet_md::options. */ 492#define LNET_MD_OP_GET (1 << 1) 493/** See lnet_md::options. */ 494#define LNET_MD_MANAGE_REMOTE (1 << 2) 495/* unused (1 << 3) */ 496/** See lnet_md::options. */ 497#define LNET_MD_TRUNCATE (1 << 4) 498/** See lnet_md::options. */ 499#define LNET_MD_ACK_DISABLE (1 << 5) 500/** See lnet_md::options. */ 501#define LNET_MD_IOVEC (1 << 6) 502/** See lnet_md::options. */ 503#define LNET_MD_MAX_SIZE (1 << 7) 504/** See lnet_md::options. */ 505#define LNET_MD_KIOV (1 << 8) 506 507/* For compatibility with Cray Portals */ 508#define LNET_MD_PHYS 0 509 510/** Infinite threshold on MD operations. See lnet_md::threshold */ 511#define LNET_MD_THRESH_INF (-1) 512 513/** @} lnet_md */ 514 515/** \addtogroup lnet_eq 516 * @{ 517 */ 518 519/** 520 * Six types of events can be logged in an event queue. 521 */ 522enum lnet_event_kind { 523 /** An incoming GET operation has completed on the MD. */ 524 LNET_EVENT_GET = 1, 525 /** 526 * An incoming PUT operation has completed on the MD. The 527 * underlying layers will not alter the memory (on behalf of this 528 * operation) once this event has been logged. 529 */ 530 LNET_EVENT_PUT, 531 /** 532 * A REPLY operation has completed. This event is logged after the 533 * data (if any) from the REPLY has been written into the MD. 534 */ 535 LNET_EVENT_REPLY, 536 /** An acknowledgment has been received. */ 537 LNET_EVENT_ACK, 538 /** 539 * An outgoing send (PUT or GET) operation has completed. This event 540 * is logged after the entire buffer has been sent and it is safe for 541 * the caller to reuse the buffer. 542 * 543 * Note: 544 * - The LNET_EVENT_SEND doesn't guarantee message delivery. It can 545 * happen even when the message has not yet been put out on wire. 546 * - It's unsafe to assume that in an outgoing GET operation 547 * the LNET_EVENT_SEND event would happen before the 548 * LNET_EVENT_REPLY event. The same holds for LNET_EVENT_SEND and 549 * LNET_EVENT_ACK events in an outgoing PUT operation. 550 */ 551 LNET_EVENT_SEND, 552 /** 553 * A MD has been unlinked. Note that LNetMDUnlink() does not 554 * necessarily trigger an LNET_EVENT_UNLINK event. 555 * \see LNetMDUnlink 556 */ 557 LNET_EVENT_UNLINK, 558}; 559 560#define LNET_SEQ_GT(a, b) (((signed long)((a) - (b))) > 0) 561 562/** 563 * Information about an event on a MD. 564 */ 565struct lnet_event { 566 /** The identifier (nid, pid) of the target. */ 567 struct lnet_process_id target; 568 /** The identifier (nid, pid) of the initiator. */ 569 struct lnet_process_id initiator; 570 /** 571 * The NID of the immediate sender. If the request has been forwarded 572 * by routers, this is the NID of the last hop; otherwise it's the 573 * same as the initiator. 574 */ 575 lnet_nid_t sender; 576 /** Indicates the type of the event. */ 577 enum lnet_event_kind type; 578 /** The portal table index specified in the request */ 579 unsigned int pt_index; 580 /** A copy of the match bits specified in the request. */ 581 __u64 match_bits; 582 /** The length (in bytes) specified in the request. */ 583 unsigned int rlength; 584 /** 585 * The length (in bytes) of the data that was manipulated by the 586 * operation. For truncated operations, the manipulated length will be 587 * the number of bytes specified by the MD (possibly with an offset, 588 * see lnet_md). For all other operations, the manipulated length 589 * will be the length of the requested operation, i.e. rlength. 590 */ 591 unsigned int mlength; 592 /** 593 * The handle to the MD associated with the event. The handle may be 594 * invalid if the MD has been unlinked. 595 */ 596 struct lnet_handle_md md_handle; 597 /** 598 * A snapshot of the state of the MD immediately after the event has 599 * been processed. In particular, the threshold field in md will 600 * reflect the value of the threshold after the operation occurred. 601 */ 602 struct lnet_md md; 603 /** 604 * 64 bits of out-of-band user data. Only valid for LNET_EVENT_PUT. 605 * \see LNetPut 606 */ 607 __u64 hdr_data; 608 /** 609 * Indicates the completion status of the operation. It's 0 for 610 * successful operations, otherwise it's an error code. 611 */ 612 int status; 613 /** 614 * Indicates whether the MD has been unlinked. Note that: 615 * - An event with unlinked set is the last event on the MD. 616 * - This field is also set for an explicit LNET_EVENT_UNLINK event. 617 * \see LNetMDUnlink 618 */ 619 int unlinked; 620 /** 621 * The displacement (in bytes) into the memory region that the 622 * operation used. The offset can be determined by the operation for 623 * a remote managed MD or by the local MD. 624 * \see lnet_md::options 625 */ 626 unsigned int offset; 627 /** 628 * The sequence number for this event. Sequence numbers are unique 629 * to each event. 630 */ 631 volatile unsigned long sequence; 632}; 633 634/** 635 * Event queue handler function type. 636 * 637 * The EQ handler runs for each event that is deposited into the EQ. The 638 * handler is supplied with a pointer to the event that triggered the 639 * handler invocation. 640 * 641 * The handler must not block, must be reentrant, and must not call any LNet 642 * API functions. It should return as quickly as possible. 643 */ 644typedef void (*lnet_eq_handler_t)(struct lnet_event *event); 645#define LNET_EQ_HANDLER_NONE NULL 646/** @} lnet_eq */ 647 648/** \addtogroup lnet_data 649 * @{ 650 */ 651 652/** 653 * Specify whether an acknowledgment should be sent by target when the PUT 654 * operation completes (i.e., when the data has been written to a MD of the 655 * target process). 656 * 657 * \see lnet_md::options for the discussion on LNET_MD_ACK_DISABLE by which 658 * acknowledgments can be disabled for a MD. 659 */ 660enum lnet_ack_req { 661 /** Request an acknowledgment */ 662 LNET_ACK_REQ, 663 /** Request that no acknowledgment should be generated. */ 664 LNET_NOACK_REQ 665}; 666/** @} lnet_data */ 667 668/** @} lnet */ 669#endif 670