1/* 2 * Copyright (c) 2008 Oracle. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 34#ifndef _LINUX_RDS_H 35#define _LINUX_RDS_H 36 37#include <linux/types.h> 38#include <linux/socket.h> /* For __kernel_sockaddr_storage. */ 39 40#define RDS_IB_ABI_VERSION 0x301 41 42#define SOL_RDS 276 43 44/* 45 * setsockopt/getsockopt for SOL_RDS 46 */ 47#define RDS_CANCEL_SENT_TO 1 48#define RDS_GET_MR 2 49#define RDS_FREE_MR 3 50/* deprecated: RDS_BARRIER 4 */ 51#define RDS_RECVERR 5 52#define RDS_CONG_MONITOR 6 53#define RDS_GET_MR_FOR_DEST 7 54#define SO_RDS_TRANSPORT 8 55 56/* Socket option to tap receive path latency 57 * SO_RDS: SO_RDS_MSG_RXPATH_LATENCY 58 * Format used struct rds_rx_trace_so 59 */ 60#define SO_RDS_MSG_RXPATH_LATENCY 10 61 62 63/* supported values for SO_RDS_TRANSPORT */ 64#define RDS_TRANS_IB 0 65#define RDS_TRANS_IWARP 1 66#define RDS_TRANS_TCP 2 67#define RDS_TRANS_COUNT 3 68#define RDS_TRANS_NONE (~0) 69 70/* 71 * Control message types for SOL_RDS. 72 * 73 * CMSG_RDMA_ARGS (sendmsg) 74 * Request a RDMA transfer to/from the specified 75 * memory ranges. 76 * The cmsg_data is a struct rds_rdma_args. 77 * RDS_CMSG_RDMA_DEST (recvmsg, sendmsg) 78 * Kernel informs application about intended 79 * source/destination of a RDMA transfer 80 * RDS_CMSG_RDMA_MAP (sendmsg) 81 * Application asks kernel to map the given 82 * memory range into a IB MR, and send the 83 * R_Key along in an RDS extension header. 84 * The cmsg_data is a struct rds_get_mr_args, 85 * the same as for the GET_MR setsockopt. 86 * RDS_CMSG_RDMA_STATUS (recvmsg) 87 * Returns the status of a completed RDMA operation. 88 * RDS_CMSG_RXPATH_LATENCY(recvmsg) 89 * Returns rds message latencies in various stages of receive 90 * path in nS. Its set per socket using SO_RDS_MSG_RXPATH_LATENCY 91 * socket option. Legitimate points are defined in 92 * enum rds_message_rxpath_latency. More points can be added in 93 * future. CSMG format is struct rds_cmsg_rx_trace. 94 */ 95#define RDS_CMSG_RDMA_ARGS 1 96#define RDS_CMSG_RDMA_DEST 2 97#define RDS_CMSG_RDMA_MAP 3 98#define RDS_CMSG_RDMA_STATUS 4 99#define RDS_CMSG_CONG_UPDATE 5 100#define RDS_CMSG_ATOMIC_FADD 6 101#define RDS_CMSG_ATOMIC_CSWP 7 102#define RDS_CMSG_MASKED_ATOMIC_FADD 8 103#define RDS_CMSG_MASKED_ATOMIC_CSWP 9 104#define RDS_CMSG_RXPATH_LATENCY 11 105 106#define RDS_INFO_FIRST 10000 107#define RDS_INFO_COUNTERS 10000 108#define RDS_INFO_CONNECTIONS 10001 109/* 10002 aka RDS_INFO_FLOWS is deprecated */ 110#define RDS_INFO_SEND_MESSAGES 10003 111#define RDS_INFO_RETRANS_MESSAGES 10004 112#define RDS_INFO_RECV_MESSAGES 10005 113#define RDS_INFO_SOCKETS 10006 114#define RDS_INFO_TCP_SOCKETS 10007 115#define RDS_INFO_IB_CONNECTIONS 10008 116#define RDS_INFO_CONNECTION_STATS 10009 117#define RDS_INFO_IWARP_CONNECTIONS 10010 118#define RDS_INFO_LAST 10010 119 120struct rds_info_counter { 121 __u8 name[32]; 122 __u64 value; 123} __attribute__((packed)); 124 125#define RDS_INFO_CONNECTION_FLAG_SENDING 0x01 126#define RDS_INFO_CONNECTION_FLAG_CONNECTING 0x02 127#define RDS_INFO_CONNECTION_FLAG_CONNECTED 0x04 128 129#define TRANSNAMSIZ 16 130 131struct rds_info_connection { 132 __u64 next_tx_seq; 133 __u64 next_rx_seq; 134 __be32 laddr; 135 __be32 faddr; 136 __u8 transport[TRANSNAMSIZ]; /* null term ascii */ 137 __u8 flags; 138} __attribute__((packed)); 139 140#define RDS_INFO_MESSAGE_FLAG_ACK 0x01 141#define RDS_INFO_MESSAGE_FLAG_FAST_ACK 0x02 142 143struct rds_info_message { 144 __u64 seq; 145 __u32 len; 146 __be32 laddr; 147 __be32 faddr; 148 __be16 lport; 149 __be16 fport; 150 __u8 flags; 151} __attribute__((packed)); 152 153struct rds_info_socket { 154 __u32 sndbuf; 155 __be32 bound_addr; 156 __be32 connected_addr; 157 __be16 bound_port; 158 __be16 connected_port; 159 __u32 rcvbuf; 160 __u64 inum; 161} __attribute__((packed)); 162 163struct rds_info_tcp_socket { 164 __be32 local_addr; 165 __be16 local_port; 166 __be32 peer_addr; 167 __be16 peer_port; 168 __u64 hdr_rem; 169 __u64 data_rem; 170 __u32 last_sent_nxt; 171 __u32 last_expected_una; 172 __u32 last_seen_una; 173} __attribute__((packed)); 174 175#define RDS_IB_GID_LEN 16 176struct rds_info_rdma_connection { 177 __be32 src_addr; 178 __be32 dst_addr; 179 __u8 src_gid[RDS_IB_GID_LEN]; 180 __u8 dst_gid[RDS_IB_GID_LEN]; 181 182 __u32 max_send_wr; 183 __u32 max_recv_wr; 184 __u32 max_send_sge; 185 __u32 rdma_mr_max; 186 __u32 rdma_mr_size; 187}; 188 189/* RDS message Receive Path Latency points */ 190enum rds_message_rxpath_latency { 191 RDS_MSG_RX_HDR_TO_DGRAM_START = 0, 192 RDS_MSG_RX_DGRAM_REASSEMBLE, 193 RDS_MSG_RX_DGRAM_DELIVERED, 194 RDS_MSG_RX_DGRAM_TRACE_MAX 195}; 196 197struct rds_rx_trace_so { 198 __u8 rx_traces; 199 __u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; 200}; 201 202struct rds_cmsg_rx_trace { 203 __u8 rx_traces; 204 __u8 rx_trace_pos[RDS_MSG_RX_DGRAM_TRACE_MAX]; 205 __u64 rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; 206}; 207 208/* 209 * Congestion monitoring. 210 * Congestion control in RDS happens at the host connection 211 * level by exchanging a bitmap marking congested ports. 212 * By default, a process sleeping in poll() is always woken 213 * up when the congestion map is updated. 214 * With explicit monitoring, an application can have more 215 * fine-grained control. 216 * The application installs a 64bit mask value in the socket, 217 * where each bit corresponds to a group of ports. 218 * When a congestion update arrives, RDS checks the set of 219 * ports that are now uncongested against the list bit mask 220 * installed in the socket, and if they overlap, we queue a 221 * cong_notification on the socket. 222 * 223 * To install the congestion monitor bitmask, use RDS_CONG_MONITOR 224 * with the 64bit mask. 225 * Congestion updates are received via RDS_CMSG_CONG_UPDATE 226 * control messages. 227 * 228 * The correspondence between bits and ports is 229 * 1 << (portnum % 64) 230 */ 231#define RDS_CONG_MONITOR_SIZE 64 232#define RDS_CONG_MONITOR_BIT(port) (((unsigned int) port) % RDS_CONG_MONITOR_SIZE) 233#define RDS_CONG_MONITOR_MASK(port) (1ULL << RDS_CONG_MONITOR_BIT(port)) 234 235/* 236 * RDMA related types 237 */ 238 239/* 240 * This encapsulates a remote memory location. 241 * In the current implementation, it contains the R_Key 242 * of the remote memory region, and the offset into it 243 * (so that the application does not have to worry about 244 * alignment). 245 */ 246typedef __u64 rds_rdma_cookie_t; 247 248struct rds_iovec { 249 __u64 addr; 250 __u64 bytes; 251}; 252 253struct rds_get_mr_args { 254 struct rds_iovec vec; 255 __u64 cookie_addr; 256 __u64 flags; 257}; 258 259struct rds_get_mr_for_dest_args { 260 struct __kernel_sockaddr_storage dest_addr; 261 struct rds_iovec vec; 262 __u64 cookie_addr; 263 __u64 flags; 264}; 265 266struct rds_free_mr_args { 267 rds_rdma_cookie_t cookie; 268 __u64 flags; 269}; 270 271struct rds_rdma_args { 272 rds_rdma_cookie_t cookie; 273 struct rds_iovec remote_vec; 274 __u64 local_vec_addr; 275 __u64 nr_local; 276 __u64 flags; 277 __u64 user_token; 278}; 279 280struct rds_atomic_args { 281 rds_rdma_cookie_t cookie; 282 __u64 local_addr; 283 __u64 remote_addr; 284 union { 285 struct { 286 __u64 compare; 287 __u64 swap; 288 } cswp; 289 struct { 290 __u64 add; 291 } fadd; 292 struct { 293 __u64 compare; 294 __u64 swap; 295 __u64 compare_mask; 296 __u64 swap_mask; 297 } m_cswp; 298 struct { 299 __u64 add; 300 __u64 nocarry_mask; 301 } m_fadd; 302 }; 303 __u64 flags; 304 __u64 user_token; 305}; 306 307struct rds_rdma_notify { 308 __u64 user_token; 309 __s32 status; 310}; 311 312#define RDS_RDMA_SUCCESS 0 313#define RDS_RDMA_REMOTE_ERROR 1 314#define RDS_RDMA_CANCELED 2 315#define RDS_RDMA_DROPPED 3 316#define RDS_RDMA_OTHER_ERROR 4 317 318/* 319 * Common set of flags for all RDMA related structs 320 */ 321#define RDS_RDMA_READWRITE 0x0001 322#define RDS_RDMA_FENCE 0x0002 /* use FENCE for immediate send */ 323#define RDS_RDMA_INVALIDATE 0x0004 /* invalidate R_Key after freeing MR */ 324#define RDS_RDMA_USE_ONCE 0x0008 /* free MR after use */ 325#define RDS_RDMA_DONTWAIT 0x0010 /* Don't wait in SET_BARRIER */ 326#define RDS_RDMA_NOTIFY_ME 0x0020 /* Notify when operation completes */ 327#define RDS_RDMA_SILENT 0x0040 /* Do not interrupt remote */ 328 329#endif /* IB_RDS_H */ 330