1/****************************************************************************** 2 * xen_netif.h 3 * 4 * Unified network-device I/O interface for Xen guest OSes. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Copyright (c) 2003-2004, Keir Fraser 25 */ 26 27#ifndef __XEN_PUBLIC_IO_XEN_NETIF_H__ 28#define __XEN_PUBLIC_IO_XEN_NETIF_H__ 29 30#include "ring.h" 31#include "../grant_table.h" 32 33/* 34 * Older implementation of Xen network frontend / backend has an 35 * implicit dependency on the MAX_SKB_FRAGS as the maximum number of 36 * ring slots a skb can use. Netfront / netback may not work as 37 * expected when frontend and backend have different MAX_SKB_FRAGS. 38 * 39 * A better approach is to add mechanism for netfront / netback to 40 * negotiate this value. However we cannot fix all possible 41 * frontends, so we need to define a value which states the minimum 42 * slots backend must support. 43 * 44 * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS 45 * (18), which is proved to work with most frontends. Any new backend 46 * which doesn't negotiate with frontend should expect frontend to 47 * send a valid packet using slots up to this value. 48 */ 49#define XEN_NETIF_NR_SLOTS_MIN 18 50 51/* 52 * Notifications after enqueuing any type of message should be conditional on 53 * the appropriate req_event or rsp_event field in the shared ring. 54 * If the client sends notification for rx requests then it should specify 55 * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume 56 * that it cannot safely queue packets (as it may not be kicked to send them). 57 */ 58 59/* 60 * "feature-split-event-channels" is introduced to separate guest TX 61 * and RX notification. Backend either doesn't support this feature or 62 * advertises it via xenstore as 0 (disabled) or 1 (enabled). 63 * 64 * To make use of this feature, frontend should allocate two event 65 * channels for TX and RX, advertise them to backend as 66 * "event-channel-tx" and "event-channel-rx" respectively. If frontend 67 * doesn't want to use this feature, it just writes "event-channel" 68 * node as before. 69 */ 70 71/* 72 * Multiple transmit and receive queues: 73 * If supported, the backend will write the key "multi-queue-max-queues" to 74 * the directory for that vif, and set its value to the maximum supported 75 * number of queues. 76 * Frontends that are aware of this feature and wish to use it can write the 77 * key "multi-queue-num-queues", set to the number they wish to use, which 78 * must be greater than zero, and no more than the value reported by the backend 79 * in "multi-queue-max-queues". 80 * 81 * Queues replicate the shared rings and event channels. 82 * "feature-split-event-channels" may optionally be used when using 83 * multiple queues, but is not mandatory. 84 * 85 * Each queue consists of one shared ring pair, i.e. there must be the same 86 * number of tx and rx rings. 87 * 88 * For frontends requesting just one queue, the usual event-channel and 89 * ring-ref keys are written as before, simplifying the backend processing 90 * to avoid distinguishing between a frontend that doesn't understand the 91 * multi-queue feature, and one that does, but requested only one queue. 92 * 93 * Frontends requesting two or more queues must not write the toplevel 94 * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, 95 * instead writing those keys under sub-keys having the name "queue-N" where 96 * N is the integer ID of the queue for which those keys belong. Queues 97 * are indexed from zero. For example, a frontend with two queues and split 98 * event channels must write the following set of queue-related keys: 99 * 100 * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" 101 * /local/domain/1/device/vif/0/queue-0 = "" 102 * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>" 103 * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>" 104 * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>" 105 * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>" 106 * /local/domain/1/device/vif/0/queue-1 = "" 107 * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>" 108 * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1" 109 * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>" 110 * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>" 111 * 112 * If there is any inconsistency in the XenStore data, the backend may 113 * choose not to connect any queues, instead treating the request as an 114 * error. This includes scenarios where more (or fewer) queues were 115 * requested than the frontend provided details for. 116 * 117 * Mapping of packets to queues is considered to be a function of the 118 * transmitting system (backend or frontend) and is not negotiated 119 * between the two. Guests are free to transmit packets on any queue 120 * they choose, provided it has been set up correctly. Guests must be 121 * prepared to receive packets on any queue they have requested be set up. 122 */ 123 124/* 125 * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum 126 * offload off or on. If it is missing then the feature is assumed to be on. 127 * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum 128 * offload on or off. If it is missing then the feature is assumed to be off. 129 */ 130 131/* 132 * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to 133 * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither 134 * frontends nor backends are assumed to be capable unless the flags are 135 * present. 136 */ 137 138/* 139 * "feature-multicast-control" and "feature-dynamic-multicast-control" 140 * advertise the capability to filter ethernet multicast packets in the 141 * backend. If the frontend wishes to take advantage of this feature then 142 * it may set "request-multicast-control". If the backend only advertises 143 * "feature-multicast-control" then "request-multicast-control" must be set 144 * before the frontend moves into the connected state. The backend will 145 * sample the value on this state transition and any subsequent change in 146 * value will have no effect. However, if the backend also advertises 147 * "feature-dynamic-multicast-control" then "request-multicast-control" 148 * may be set by the frontend at any time. In this case, the backend will 149 * watch the value and re-sample on watch events. 150 * 151 * If the sampled value of "request-multicast-control" is set then the 152 * backend transmit side should no longer flood multicast packets to the 153 * frontend, it should instead drop any multicast packet that does not 154 * match in a filter list. 155 * The list is amended by the frontend by sending dummy transmit requests 156 * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as 157 * specified below. 158 * Note that the filter list may be amended even if the sampled value of 159 * "request-multicast-control" is not set, however the filter should only 160 * be applied if it is set. 161 */ 162 163/* 164 * Control ring 165 * ============ 166 * 167 * Some features, such as hashing (detailed below), require a 168 * significant amount of out-of-band data to be passed from frontend to 169 * backend. Use of xenstore is not suitable for large quantities of data 170 * because of quota limitations and so a dedicated 'control ring' is used. 171 * The ability of the backend to use a control ring is advertised by 172 * setting: 173 * 174 * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1" 175 * 176 * The frontend provides a control ring to the backend by setting: 177 * 178 * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref> 179 * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port> 180 * 181 * where <gref> is the grant reference of the shared page used to 182 * implement the control ring and <port> is an event channel to be used 183 * as a mailbox interrupt. These keys must be set before the frontend 184 * moves into the connected state. 185 * 186 * The control ring uses a fixed request/response message size and is 187 * balanced (i.e. one request to one response), so operationally it is much 188 * the same as a transmit or receive ring. 189 * Note that there is no requirement that responses are issued in the same 190 * order as requests. 191 */ 192 193/* 194 * Hash types 195 * ========== 196 * 197 * For the purposes of the definitions below, 'Packet[]' is an array of 198 * octets containing an IP packet without options, 'Array[X..Y]' means a 199 * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is 200 * used to indicate concatenation of arrays. 201 */ 202 203/* 204 * A hash calculated over an IP version 4 header as follows: 205 * 206 * Buffer[0..8] = Packet[12..15] (source address) + 207 * Packet[16..19] (destination address) 208 * 209 * Result = Hash(Buffer, 8) 210 */ 211#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0 212#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \ 213 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4) 214 215/* 216 * A hash calculated over an IP version 4 header and TCP header as 217 * follows: 218 * 219 * Buffer[0..12] = Packet[12..15] (source address) + 220 * Packet[16..19] (destination address) + 221 * Packet[20..21] (source port) + 222 * Packet[22..23] (destination port) 223 * 224 * Result = Hash(Buffer, 12) 225 */ 226#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1 227#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \ 228 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) 229 230/* 231 * A hash calculated over an IP version 6 header as follows: 232 * 233 * Buffer[0..32] = Packet[8..23] (source address ) + 234 * Packet[24..39] (destination address) 235 * 236 * Result = Hash(Buffer, 32) 237 */ 238#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2 239#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \ 240 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6) 241 242/* 243 * A hash calculated over an IP version 6 header and TCP header as 244 * follows: 245 * 246 * Buffer[0..36] = Packet[8..23] (source address) + 247 * Packet[24..39] (destination address) + 248 * Packet[40..41] (source port) + 249 * Packet[42..43] (destination port) 250 * 251 * Result = Hash(Buffer, 36) 252 */ 253#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3 254#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \ 255 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) 256 257/* 258 * Hash algorithms 259 * =============== 260 */ 261 262#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0 263 264/* 265 * Toeplitz hash: 266 */ 267 268#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1 269 270/* 271 * This algorithm uses a 'key' as well as the data buffer itself. 272 * (Buffer[] and Key[] are treated as shift-registers where the MSB of 273 * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1] 274 * is the 'right-most'). 275 * 276 * Value = 0 277 * For number of bits in Buffer[] 278 * If (left-most bit of Buffer[] is 1) 279 * Value ^= left-most 32 bits of Key[] 280 * Key[] << 1 281 * Buffer[] << 1 282 * 283 * The code below is provided for convenience where an operating system 284 * does not already provide an implementation. 285 */ 286#ifdef XEN_NETIF_DEFINE_TOEPLITZ 287static uint32_t xen_netif_toeplitz_hash(const uint8_t *key, 288 unsigned int keylen, 289 const uint8_t *buf, unsigned int buflen) 290{ 291 unsigned int keyi, bufi; 292 uint64_t prefix = 0; 293 uint64_t hash = 0; 294 295 /* Pre-load prefix with the first 8 bytes of the key */ 296 for (keyi = 0; keyi < 8; keyi++) { 297 prefix <<= 8; 298 prefix |= (keyi < keylen) ? key[keyi] : 0; 299 } 300 301 for (bufi = 0; bufi < buflen; bufi++) { 302 uint8_t byte = buf[bufi]; 303 unsigned int bit; 304 305 for (bit = 0; bit < 8; bit++) { 306 if (byte & 0x80) 307 hash ^= prefix; 308 prefix <<= 1; 309 byte <<= 1; 310 } 311 312 /* 313 * 'prefix' has now been left-shifted by 8, so 314 * OR in the next byte. 315 */ 316 prefix |= (keyi < keylen) ? key[keyi] : 0; 317 keyi++; 318 } 319 320 /* The valid part of the hash is in the upper 32 bits. */ 321 return hash >> 32; 322} 323#endif /* XEN_NETIF_DEFINE_TOEPLITZ */ 324 325/* 326 * Control requests (struct xen_netif_ctrl_request) 327 * ================================================ 328 * 329 * All requests have the following format: 330 * 331 * 0 1 2 3 4 5 6 7 octet 332 * +-----+-----+-----+-----+-----+-----+-----+-----+ 333 * | id | type | data[0] | 334 * +-----+-----+-----+-----+-----+-----+-----+-----+ 335 * | data[1] | data[2] | 336 * +-----+-----+-----+-----+-----------------------+ 337 * 338 * id: the request identifier, echoed in response. 339 * type: the type of request (see below) 340 * data[]: any data associated with the request (determined by type) 341 */ 342 343struct xen_netif_ctrl_request { 344 uint16_t id; 345 uint16_t type; 346 347#define XEN_NETIF_CTRL_TYPE_INVALID 0 348#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 1 349#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 2 350#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 3 351#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4 352#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5 353#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 6 354#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 7 355 356 uint32_t data[3]; 357}; 358 359/* 360 * Control responses (struct xen_netif_ctrl_response) 361 * ================================================== 362 * 363 * All responses have the following format: 364 * 365 * 0 1 2 3 4 5 6 7 octet 366 * +-----+-----+-----+-----+-----+-----+-----+-----+ 367 * | id | type | status | 368 * +-----+-----+-----+-----+-----+-----+-----+-----+ 369 * | data | 370 * +-----+-----+-----+-----+ 371 * 372 * id: the corresponding request identifier 373 * type: the type of the corresponding request 374 * status: the status of request processing 375 * data: any data associated with the response (determined by type and 376 * status) 377 */ 378 379struct xen_netif_ctrl_response { 380 uint16_t id; 381 uint16_t type; 382 uint32_t status; 383 384#define XEN_NETIF_CTRL_STATUS_SUCCESS 0 385#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED 1 386#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2 387#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW 3 388 389 uint32_t data; 390}; 391 392/* 393 * Control messages 394 * ================ 395 * 396 * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 397 * -------------------------------------- 398 * 399 * This is sent by the frontend to set the desired hash algorithm. 400 * 401 * Request: 402 * 403 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 404 * data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value 405 * data[1] = 0 406 * data[2] = 0 407 * 408 * Response: 409 * 410 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 411 * supported 412 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not 413 * supported 414 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 415 * 416 * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables 417 * hashing and the backend is free to choose how it steers packets 418 * to queues (which is the default behaviour). 419 * 420 * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 421 * ---------------------------------- 422 * 423 * This is sent by the frontend to query the types of hash supported by 424 * the backend. 425 * 426 * Request: 427 * 428 * type = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 429 * data[0] = 0 430 * data[1] = 0 431 * data[2] = 0 432 * 433 * Response: 434 * 435 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported 436 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 437 * data = supported hash types (if operation was successful) 438 * 439 * NOTE: A valid hash algorithm must be selected before this operation can 440 * succeed. 441 * 442 * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 443 * ---------------------------------- 444 * 445 * This is sent by the frontend to set the types of hash that the backend 446 * should calculate. (See above for hash type definitions). 447 * Note that the 'maximal' type of hash should always be chosen. For 448 * example, if the frontend sets both IPV4 and IPV4_TCP hash types then 449 * the latter hash type should be calculated for any TCP packet and the 450 * former only calculated for non-TCP packets. 451 * 452 * Request: 453 * 454 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 455 * data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values 456 * data[1] = 0 457 * data[2] = 0 458 * 459 * Response: 460 * 461 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 462 * supported 463 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag 464 * value is invalid or 465 * unsupported 466 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 467 * data = 0 468 * 469 * NOTE: A valid hash algorithm must be selected before this operation can 470 * succeed. 471 * Also, setting data[0] to zero disables hashing and the backend 472 * is free to choose how it steers packets to queues. 473 * 474 * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 475 * -------------------------------- 476 * 477 * This is sent by the frontend to set the key of the hash if the algorithm 478 * requires it. (See hash algorithms above). 479 * 480 * Request: 481 * 482 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 483 * data[0] = grant reference of page containing the key (assumed to 484 * start at beginning of grant) 485 * data[1] = size of key in octets 486 * data[2] = 0 487 * 488 * Response: 489 * 490 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 491 * supported 492 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid 493 * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Key size is larger 494 * than the backend 495 * supports 496 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 497 * data = 0 498 * 499 * NOTE: Any key octets not specified are assumed to be zero (the key 500 * is assumed to be empty by default) and specifying a new key 501 * invalidates any previous key, hence specifying a key size of 502 * zero will clear the key (which ensures that the calculated hash 503 * will always be zero). 504 * The maximum size of key is algorithm and backend specific, but 505 * is also limited by the single grant reference. 506 * The grant reference may be read-only and must remain valid until 507 * the response has been processed. 508 * 509 * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 510 * ----------------------------------------- 511 * 512 * This is sent by the frontend to query the maximum size of mapping 513 * table supported by the backend. The size is specified in terms of 514 * table entries. 515 * 516 * Request: 517 * 518 * type = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 519 * data[0] = 0 520 * data[1] = 0 521 * data[2] = 0 522 * 523 * Response: 524 * 525 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported 526 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 527 * data = maximum number of entries allowed in the mapping table 528 * (if operation was successful) or zero if a mapping table is 529 * not supported (i.e. hash mapping is done only by modular 530 * arithmetic). 531 * 532 * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 533 * ------------------------------------- 534 * 535 * This is sent by the frontend to set the actual size of the mapping 536 * table to be used by the backend. The size is specified in terms of 537 * table entries. 538 * Any previous table is invalidated by this message and any new table 539 * is assumed to be zero filled. 540 * 541 * Request: 542 * 543 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 544 * data[0] = number of entries in mapping table 545 * data[1] = 0 546 * data[2] = 0 547 * 548 * Response: 549 * 550 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 551 * supported 552 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid 553 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 554 * data = 0 555 * 556 * NOTE: Setting data[0] to 0 means that hash mapping should be done 557 * using modular arithmetic. 558 * 559 * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 560 * ------------------------------------ 561 * 562 * This is sent by the frontend to set the content of the table mapping 563 * hash value to queue number. The backend should calculate the hash from 564 * the packet header, use it as an index into the table (modulo the size 565 * of the table) and then steer the packet to the queue number found at 566 * that index. 567 * 568 * Request: 569 * 570 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 571 * data[0] = grant reference of page containing the mapping (sub-)table 572 * (assumed to start at beginning of grant) 573 * data[1] = size of (sub-)table in entries 574 * data[2] = offset, in entries, of sub-table within overall table 575 * 576 * Response: 577 * 578 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 579 * supported 580 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content 581 * is invalid 582 * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Table size is larger 583 * than the backend 584 * supports 585 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 586 * data = 0 587 * 588 * NOTE: The overall table has the following format: 589 * 590 * 0 1 2 3 4 5 6 7 octet 591 * +-----+-----+-----+-----+-----+-----+-----+-----+ 592 * | mapping[0] | mapping[1] | 593 * +-----+-----+-----+-----+-----+-----+-----+-----+ 594 * | . | 595 * | . | 596 * | . | 597 * +-----+-----+-----+-----+-----+-----+-----+-----+ 598 * | mapping[N-2] | mapping[N-1] | 599 * +-----+-----+-----+-----+-----+-----+-----+-----+ 600 * 601 * where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 602 * message and each mapping must specifies a queue between 0 and 603 * "multi-queue-num-queues" (see above). 604 * The backend may support a mapping table larger than can be 605 * mapped by a single grant reference. Thus sub-tables within a 606 * larger table can be individually set by sending multiple messages 607 * with differing offset values. Specifying a new sub-table does not 608 * invalidate any table data outside that range. 609 * The grant reference may be read-only and must remain valid until 610 * the response has been processed. 611 */ 612 613DEFINE_RING_TYPES(xen_netif_ctrl, 614 struct xen_netif_ctrl_request, 615 struct xen_netif_ctrl_response); 616 617/* 618 * Guest transmit 619 * ============== 620 * 621 * This is the 'wire' format for transmit (frontend -> backend) packets: 622 * 623 * Fragment 1: xen_netif_tx_request_t - flags = XEN_NETTXF_* 624 * size = total packet size 625 * [Extra 1: xen_netif_extra_info_t] - (only if fragment 1 flags include 626 * XEN_NETTXF_extra_info) 627 * ... 628 * [Extra N: xen_netif_extra_info_t] - (only if extra N-1 flags include 629 * XEN_NETIF_EXTRA_MORE) 630 * ... 631 * Fragment N: xen_netif_tx_request_t - (only if fragment N-1 flags include 632 * XEN_NETTXF_more_data - flags on preceding 633 * extras are not relevant here) 634 * flags = 0 635 * size = fragment size 636 * 637 * NOTE: 638 * 639 * This format slightly is different from that used for receive 640 * (backend -> frontend) packets. Specifically, in a multi-fragment 641 * packet the actual size of fragment 1 can only be determined by 642 * subtracting the sizes of fragments 2..N from the total packet size. 643 * 644 * Ring slot size is 12 octets, however not all request/response 645 * structs use the full size. 646 * 647 * tx request data (xen_netif_tx_request_t) 648 * ------------------------------------ 649 * 650 * 0 1 2 3 4 5 6 7 octet 651 * +-----+-----+-----+-----+-----+-----+-----+-----+ 652 * | grant ref | offset | flags | 653 * +-----+-----+-----+-----+-----+-----+-----+-----+ 654 * | id | size | 655 * +-----+-----+-----+-----+ 656 * 657 * grant ref: Reference to buffer page. 658 * offset: Offset within buffer page. 659 * flags: XEN_NETTXF_*. 660 * id: request identifier, echoed in response. 661 * size: packet size in bytes. 662 * 663 * tx response (xen_netif_tx_response_t) 664 * --------------------------------- 665 * 666 * 0 1 2 3 4 5 6 7 octet 667 * +-----+-----+-----+-----+-----+-----+-----+-----+ 668 * | id | status | unused | 669 * +-----+-----+-----+-----+-----+-----+-----+-----+ 670 * | unused | 671 * +-----+-----+-----+-----+ 672 * 673 * id: reflects id in transmit request 674 * status: XEN_NETIF_RSP_* 675 * 676 * Guest receive 677 * ============= 678 * 679 * This is the 'wire' format for receive (backend -> frontend) packets: 680 * 681 * Fragment 1: xen_netif_rx_request_t - flags = XEN_NETRXF_* 682 * size = fragment size 683 * [Extra 1: xen_netif_extra_info_t] - (only if fragment 1 flags include 684 * XEN_NETRXF_extra_info) 685 * ... 686 * [Extra N: xen_netif_extra_info_t] - (only if extra N-1 flags include 687 * XEN_NETIF_EXTRA_MORE) 688 * ... 689 * Fragment N: xen_netif_rx_request_t - (only if fragment N-1 flags include 690 * XEN_NETRXF_more_data - flags on preceding 691 * extras are not relevant here) 692 * flags = 0 693 * size = fragment size 694 * 695 * NOTE: 696 * 697 * This format slightly is different from that used for transmit 698 * (frontend -> backend) packets. Specifically, in a multi-fragment 699 * packet the size of the packet can only be determined by summing the 700 * sizes of fragments 1..N. 701 * 702 * Ring slot size is 8 octets. 703 * 704 * rx request (xen_netif_rx_request_t) 705 * ------------------------------- 706 * 707 * 0 1 2 3 4 5 6 7 octet 708 * +-----+-----+-----+-----+-----+-----+-----+-----+ 709 * | id | pad | gref | 710 * +-----+-----+-----+-----+-----+-----+-----+-----+ 711 * 712 * id: request identifier, echoed in response. 713 * gref: reference to incoming granted frame. 714 * 715 * rx response (xen_netif_rx_response_t) 716 * --------------------------------- 717 * 718 * 0 1 2 3 4 5 6 7 octet 719 * +-----+-----+-----+-----+-----+-----+-----+-----+ 720 * | id | offset | flags | status | 721 * +-----+-----+-----+-----+-----+-----+-----+-----+ 722 * 723 * id: reflects id in receive request 724 * offset: offset in page of start of received packet 725 * flags: XEN_NETRXF_* 726 * status: -ve: XEN_NETIF_RSP_*; +ve: Rx'ed pkt size. 727 * 728 * NOTE: Historically, to support GSO on the frontend receive side, Linux 729 * netfront does not make use of the rx response id (because, as 730 * described below, extra info structures overlay the id field). 731 * Instead it assumes that responses always appear in the same ring 732 * slot as their corresponding request. Thus, to maintain 733 * compatibility, backends must make sure this is the case. 734 * 735 * Extra Info 736 * ========== 737 * 738 * Can be present if initial request or response has NET{T,R}XF_extra_info, 739 * or previous extra request has XEN_NETIF_EXTRA_MORE. 740 * 741 * The struct therefore needs to fit into either a tx or rx slot and 742 * is therefore limited to 8 octets. 743 * 744 * NOTE: Because extra info data overlays the usual request/response 745 * structures, there is no id information in the opposite direction. 746 * So, if an extra info overlays an rx response the frontend can 747 * assume that it is in the same ring slot as the request that was 748 * consumed to make the slot available, and the backend must ensure 749 * this assumption is true. 750 * 751 * extra info (xen_netif_extra_info_t) 752 * ------------------------------- 753 * 754 * General format: 755 * 756 * 0 1 2 3 4 5 6 7 octet 757 * +-----+-----+-----+-----+-----+-----+-----+-----+ 758 * |type |flags| type specific data | 759 * +-----+-----+-----+-----+-----+-----+-----+-----+ 760 * | padding for tx | 761 * +-----+-----+-----+-----+ 762 * 763 * type: XEN_NETIF_EXTRA_TYPE_* 764 * flags: XEN_NETIF_EXTRA_FLAG_* 765 * padding for tx: present only in the tx case due to 8 octet limit 766 * from rx case. Not shown in type specific entries 767 * below. 768 * 769 * XEN_NETIF_EXTRA_TYPE_GSO: 770 * 771 * 0 1 2 3 4 5 6 7 octet 772 * +-----+-----+-----+-----+-----+-----+-----+-----+ 773 * |type |flags| size |type | pad | features | 774 * +-----+-----+-----+-----+-----+-----+-----+-----+ 775 * 776 * type: Must be XEN_NETIF_EXTRA_TYPE_GSO 777 * flags: XEN_NETIF_EXTRA_FLAG_* 778 * size: Maximum payload size of each segment. For example, 779 * for TCP this is just the path MSS. 780 * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of 781 * the packet and any extra features required to segment the 782 * packet properly. 783 * features: EN_XEN_NETIF_GSO_FEAT_*: This specifies any extra GSO 784 * features required to process this packet, such as ECN 785 * support for TCPv4. 786 * 787 * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: 788 * 789 * 0 1 2 3 4 5 6 7 octet 790 * +-----+-----+-----+-----+-----+-----+-----+-----+ 791 * |type |flags| addr | 792 * +-----+-----+-----+-----+-----+-----+-----+-----+ 793 * 794 * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} 795 * flags: XEN_NETIF_EXTRA_FLAG_* 796 * addr: address to add/remove 797 * 798 * XEN_NETIF_EXTRA_TYPE_HASH: 799 * 800 * A backend that supports teoplitz hashing is assumed to accept 801 * this type of extra info in transmit packets. 802 * A frontend that enables hashing is assumed to accept 803 * this type of extra info in receive packets. 804 * 805 * 0 1 2 3 4 5 6 7 octet 806 * +-----+-----+-----+-----+-----+-----+-----+-----+ 807 * |type |flags|htype| alg |LSB ---- value ---- MSB| 808 * +-----+-----+-----+-----+-----+-----+-----+-----+ 809 * 810 * type: Must be XEN_NETIF_EXTRA_TYPE_HASH 811 * flags: XEN_NETIF_EXTRA_FLAG_* 812 * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above) 813 * alg: The algorithm used to calculate the hash (one of 814 * XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above) 815 * value: Hash value 816 */ 817 818/* Protocol checksum field is blank in the packet (hardware offload)? */ 819#define _XEN_NETTXF_csum_blank (0) 820#define XEN_NETTXF_csum_blank (1U<<_XEN_NETTXF_csum_blank) 821 822/* Packet data has been validated against protocol checksum. */ 823#define _XEN_NETTXF_data_validated (1) 824#define XEN_NETTXF_data_validated (1U<<_XEN_NETTXF_data_validated) 825 826/* Packet continues in the next request descriptor. */ 827#define _XEN_NETTXF_more_data (2) 828#define XEN_NETTXF_more_data (1U<<_XEN_NETTXF_more_data) 829 830/* Packet to be followed by extra descriptor(s). */ 831#define _XEN_NETTXF_extra_info (3) 832#define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info) 833 834#define XEN_NETIF_MAX_TX_SIZE 0xFFFF 835struct xen_netif_tx_request { 836 grant_ref_t gref; 837 uint16_t offset; 838 uint16_t flags; 839 uint16_t id; 840 uint16_t size; 841}; 842 843/* Types of xen_netif_extra_info descriptors. */ 844#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ 845#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ 846#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ 847#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ 848#define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */ 849#define XEN_NETIF_EXTRA_TYPE_MAX (5) 850 851/* xen_netif_extra_info_t flags. */ 852#define _XEN_NETIF_EXTRA_FLAG_MORE (0) 853#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) 854 855/* GSO types */ 856#define XEN_NETIF_GSO_TYPE_NONE (0) 857#define XEN_NETIF_GSO_TYPE_TCPV4 (1) 858#define XEN_NETIF_GSO_TYPE_TCPV6 (2) 859 860/* 861 * This structure needs to fit within both xen_netif_tx_request_t and 862 * xen_netif_rx_response_t for compatibility. 863 */ 864struct xen_netif_extra_info { 865 uint8_t type; 866 uint8_t flags; 867 union { 868 struct { 869 uint16_t size; 870 uint8_t type; 871 uint8_t pad; 872 uint16_t features; 873 } gso; 874 struct { 875 uint8_t addr[6]; 876 } mcast; 877 struct { 878 uint8_t type; 879 uint8_t algorithm; 880 uint8_t value[4]; 881 } hash; 882 uint16_t pad[3]; 883 } u; 884}; 885 886struct xen_netif_tx_response { 887 uint16_t id; 888 int16_t status; 889}; 890 891struct xen_netif_rx_request { 892 uint16_t id; /* Echoed in response message. */ 893 uint16_t pad; 894 grant_ref_t gref; 895}; 896 897/* Packet data has been validated against protocol checksum. */ 898#define _XEN_NETRXF_data_validated (0) 899#define XEN_NETRXF_data_validated (1U<<_XEN_NETRXF_data_validated) 900 901/* Protocol checksum field is blank in the packet (hardware offload)? */ 902#define _XEN_NETRXF_csum_blank (1) 903#define XEN_NETRXF_csum_blank (1U<<_XEN_NETRXF_csum_blank) 904 905/* Packet continues in the next request descriptor. */ 906#define _XEN_NETRXF_more_data (2) 907#define XEN_NETRXF_more_data (1U<<_XEN_NETRXF_more_data) 908 909/* Packet to be followed by extra descriptor(s). */ 910#define _XEN_NETRXF_extra_info (3) 911#define XEN_NETRXF_extra_info (1U<<_XEN_NETRXF_extra_info) 912 913/* Packet has GSO prefix. Deprecated but included for compatibility */ 914#define _XEN_NETRXF_gso_prefix (4) 915#define XEN_NETRXF_gso_prefix (1U<<_XEN_NETRXF_gso_prefix) 916 917struct xen_netif_rx_response { 918 uint16_t id; 919 uint16_t offset; 920 uint16_t flags; 921 int16_t status; 922}; 923 924/* 925 * Generate xen_netif ring structures and types. 926 */ 927 928DEFINE_RING_TYPES(xen_netif_tx, struct xen_netif_tx_request, 929 struct xen_netif_tx_response); 930DEFINE_RING_TYPES(xen_netif_rx, struct xen_netif_rx_request, 931 struct xen_netif_rx_response); 932 933#define XEN_NETIF_RSP_DROPPED -2 934#define XEN_NETIF_RSP_ERROR -1 935#define XEN_NETIF_RSP_OKAY 0 936/* No response: used for auxiliary requests (e.g., xen_netif_extra_info_t). */ 937#define XEN_NETIF_RSP_NULL 1 938 939#endif 940