linux/include/linux/scif.h
<<
>>
Prefs
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 * Copyright(c) 2014 Intel Corporation.
  10 *
  11 * This program is free software; you can redistribute it and/or modify
  12 * it under the terms of version 2 of the GNU General Public License as
  13 * published by the Free Software Foundation.
  14 *
  15 * This program is distributed in the hope that it will be useful, but
  16 * WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * General Public License for more details.
  19 *
  20 * BSD LICENSE
  21 *
  22 * Copyright(c) 2014 Intel Corporation.
  23 *
  24 * Redistribution and use in source and binary forms, with or without
  25 * modification, are permitted provided that the following conditions
  26 * are met:
  27 *
  28 * * Redistributions of source code must retain the above copyright
  29 *   notice, this list of conditions and the following disclaimer.
  30 * * Redistributions in binary form must reproduce the above copyright
  31 *   notice, this list of conditions and the following disclaimer in
  32 *   the documentation and/or other materials provided with the
  33 *   distribution.
  34 * * Neither the name of Intel Corporation nor the names of its
  35 *   contributors may be used to endorse or promote products derived
  36 *   from this software without specific prior written permission.
  37 *
  38 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  39 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  40 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  41 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  42 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  45 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  46 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  47 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  48 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  49 *
  50 * Intel SCIF driver.
  51 *
  52 */
  53#ifndef __SCIF_H__
  54#define __SCIF_H__
  55
  56#include <linux/types.h>
  57#include <linux/poll.h>
  58#include <linux/device.h>
  59#include <linux/scif_ioctl.h>
  60
  61#define SCIF_ACCEPT_SYNC        1
  62#define SCIF_SEND_BLOCK         1
  63#define SCIF_RECV_BLOCK         1
  64
  65enum {
  66        SCIF_PROT_READ = (1 << 0),
  67        SCIF_PROT_WRITE = (1 << 1)
  68};
  69
  70enum {
  71        SCIF_MAP_FIXED = 0x10,
  72        SCIF_MAP_KERNEL = 0x20,
  73};
  74
  75enum {
  76        SCIF_FENCE_INIT_SELF = (1 << 0),
  77        SCIF_FENCE_INIT_PEER = (1 << 1),
  78        SCIF_SIGNAL_LOCAL = (1 << 4),
  79        SCIF_SIGNAL_REMOTE = (1 << 5)
  80};
  81
  82enum {
  83        SCIF_RMA_USECPU = (1 << 0),
  84        SCIF_RMA_USECACHE = (1 << 1),
  85        SCIF_RMA_SYNC = (1 << 2),
  86        SCIF_RMA_ORDERED = (1 << 3)
  87};
  88
  89/* End of SCIF Admin Reserved Ports */
  90#define SCIF_ADMIN_PORT_END     1024
  91
  92/* End of SCIF Reserved Ports */
  93#define SCIF_PORT_RSVD          1088
  94
  95typedef struct scif_endpt *scif_epd_t;
  96typedef struct scif_pinned_pages *scif_pinned_pages_t;
  97
  98/**
  99 * struct scif_range - SCIF registered range used in kernel mode
 100 * @cookie: cookie used internally by SCIF
 101 * @nr_pages: number of pages of PAGE_SIZE
 102 * @prot_flags: R/W protection
 103 * @phys_addr: Array of bus addresses
 104 * @va: Array of kernel virtual addresses backed by the pages in the phys_addr
 105 *      array. The va is populated only when called on the host for a remote
 106 *      SCIF connection on MIC. This is required to support the use case of DMA
 107 *      between MIC and another device which is not a SCIF node e.g., an IB or
 108 *      ethernet NIC.
 109 */
 110struct scif_range {
 111        void *cookie;
 112        int nr_pages;
 113        int prot_flags;
 114        dma_addr_t *phys_addr;
 115        void __iomem **va;
 116};
 117
 118/**
 119 * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll
 120 * @epd: SCIF endpoint
 121 * @events: requested events
 122 * @revents: returned events
 123 */
 124struct scif_pollepd {
 125        scif_epd_t epd;
 126        __poll_t events;
 127        __poll_t revents;
 128};
 129
 130/**
 131 * scif_peer_dev - representation of a peer SCIF device
 132 *
 133 * Peer devices show up as PCIe devices for the mgmt node but not the cards.
 134 * The mgmt node discovers all the cards on the PCIe bus and informs the other
 135 * cards about their peers. Upon notification of a peer a node adds a peer
 136 * device to the peer bus to maintain symmetry in the way devices are
 137 * discovered across all nodes in the SCIF network.
 138 *
 139 * @dev: underlying device
 140 * @dnode - The destination node which this device will communicate with.
 141 */
 142struct scif_peer_dev {
 143        struct device dev;
 144        u8 dnode;
 145};
 146
 147/**
 148 * scif_client - representation of a SCIF client
 149 * @name: client name
 150 * @probe - client method called when a peer device is registered
 151 * @remove - client method called when a peer device is unregistered
 152 * @si - subsys_interface used internally for implementing SCIF clients
 153 */
 154struct scif_client {
 155        const char *name;
 156        void (*probe)(struct scif_peer_dev *spdev);
 157        void (*remove)(struct scif_peer_dev *spdev);
 158        struct subsys_interface si;
 159};
 160
 161#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
 162#define SCIF_REGISTER_FAILED ((off_t)-1)
 163#define SCIF_MMAP_FAILED ((void *)-1)
 164
 165/**
 166 * scif_open() - Create an endpoint
 167 *
 168 * Return:
 169 * Upon successful completion, scif_open() returns an endpoint descriptor to
 170 * be used in subsequent SCIF functions calls to refer to that endpoint;
 171 * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
 172 * returned and errno is set to indicate the error; in kernel mode a NULL
 173 * scif_epd_t is returned.
 174 *
 175 * Errors:
 176 * ENOMEM - Insufficient kernel memory was available
 177 */
 178scif_epd_t scif_open(void);
 179
 180/**
 181 * scif_bind() - Bind an endpoint to a port
 182 * @epd:        endpoint descriptor
 183 * @pn:         port number
 184 *
 185 * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
 186 * local node. If pn is zero, a port number greater than or equal to
 187 * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
 188 * exactly one local port. Ports less than 1024 when requested can only be bound
 189 * by system (or root) processes or by processes executed by privileged users.
 190 *
 191 * Return:
 192 * Upon successful completion, scif_bind() returns the port number to which epd
 193 * is bound; otherwise in user mode -1 is returned and errno is set to
 194 * indicate the error; in kernel mode the negative of one of the following
 195 * errors is returned.
 196 *
 197 * Errors:
 198 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 199 * EINVAL - the endpoint or the port is already bound
 200 * EISCONN - The endpoint is already connected
 201 * ENOSPC - No port number available for assignment
 202 * EACCES - The port requested is protected and the user is not the superuser
 203 */
 204int scif_bind(scif_epd_t epd, u16 pn);
 205
 206/**
 207 * scif_listen() - Listen for connections on an endpoint
 208 * @epd:        endpoint descriptor
 209 * @backlog:    maximum pending connection requests
 210 *
 211 * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
 212 * an endpoint that will be used to accept incoming connection requests. Once
 213 * so marked, the endpoint is said to be in the listening state and may not be
 214 * used as the endpoint of a connection.
 215 *
 216 * The endpoint, epd, must have been bound to a port.
 217 *
 218 * The backlog argument defines the maximum length to which the queue of
 219 * pending connections for epd may grow. If a connection request arrives when
 220 * the queue is full, the client may receive an error with an indication that
 221 * the connection was refused.
 222 *
 223 * Return:
 224 * Upon successful completion, scif_listen() returns 0; otherwise in user mode
 225 * -1 is returned and errno is set to indicate the error; in kernel mode the
 226 * negative of one of the following errors is returned.
 227 *
 228 * Errors:
 229 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 230 * EINVAL - the endpoint is not bound to a port
 231 * EISCONN - The endpoint is already connected or listening
 232 */
 233int scif_listen(scif_epd_t epd, int backlog);
 234
 235/**
 236 * scif_connect() - Initiate a connection on a port
 237 * @epd:        endpoint descriptor
 238 * @dst:        global id of port to which to connect
 239 *
 240 * The scif_connect() function requests the connection of endpoint epd to remote
 241 * port dst. If the connection is successful, a peer endpoint, bound to dst, is
 242 * created on node dst.node. On successful return, the connection is complete.
 243 *
 244 * If the endpoint epd has not already been bound to a port, scif_connect()
 245 * will bind it to an unused local port.
 246 *
 247 * A connection is terminated when an endpoint of the connection is closed,
 248 * either explicitly by scif_close(), or when a process that owns one of the
 249 * endpoints of the connection is terminated.
 250 *
 251 * In user space, scif_connect() supports an asynchronous connection mode
 252 * if the application has set the O_NONBLOCK flag on the endpoint via the
 253 * fcntl() system call. Setting this flag will result in the calling process
 254 * not to wait during scif_connect().
 255 *
 256 * Return:
 257 * Upon successful completion, scif_connect() returns the port ID to which the
 258 * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is
 259 * set to indicate the error; in kernel mode the negative of one of the
 260 * following errors is returned.
 261 *
 262 * Errors:
 263 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 264 * ECONNREFUSED - The destination was not listening for connections or refused
 265 * the connection request
 266 * EINVAL - dst.port is not a valid port ID
 267 * EISCONN - The endpoint is already connected
 268 * ENOMEM - No buffer space is available
 269 * ENODEV - The destination node does not exist, or the node is lost or existed,
 270 * but is not currently in the network since it may have crashed
 271 * ENOSPC - No port number available for assignment
 272 * EOPNOTSUPP - The endpoint is listening and cannot be connected
 273 */
 274int scif_connect(scif_epd_t epd, struct scif_port_id *dst);
 275
 276/**
 277 * scif_accept() - Accept a connection on an endpoint
 278 * @epd:        endpoint descriptor
 279 * @peer:       global id of port to which connected
 280 * @newepd:     new connected endpoint descriptor
 281 * @flags:      flags
 282 *
 283 * The scif_accept() call extracts the first connection request from the queue
 284 * of pending connections for the port on which epd is listening. scif_accept()
 285 * creates a new endpoint, bound to the same port as epd, and allocates a new
 286 * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
 287 * endpoint is connected to the endpoint through which the connection was
 288 * requested. epd is unaffected by this call, and remains in the listening
 289 * state.
 290 *
 291 * On successful return, peer holds the global port identifier (node id and
 292 * local port number) of the port which requested the connection.
 293 *
 294 * A connection is terminated when an endpoint of the connection is closed,
 295 * either explicitly by scif_close(), or when a process that owns one of the
 296 * endpoints of the connection is terminated.
 297 *
 298 * The number of connections that can (subsequently) be accepted on epd is only
 299 * limited by system resources (memory).
 300 *
 301 * The flags argument is formed by OR'ing together zero or more of the
 302 * following values.
 303 * SCIF_ACCEPT_SYNC - block until a connection request is presented. If
 304 *                      SCIF_ACCEPT_SYNC is not in flags, and no pending
 305 *                      connections are present on the queue, scif_accept()
 306 *                      fails with an EAGAIN error
 307 *
 308 * In user mode, the select() and poll() functions can be used to determine
 309 * when there is a connection request. In kernel mode, the scif_poll()
 310 * function may be used for this purpose. A readable event will be delivered
 311 * when a connection is requested.
 312 *
 313 * Return:
 314 * Upon successful completion, scif_accept() returns 0; otherwise in user mode
 315 * -1 is returned and errno is set to indicate the error; in kernel mode the
 316 *      negative of one of the following errors is returned.
 317 *
 318 * Errors:
 319 * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be
 320 * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete
 321 * its connection request
 322 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 323 * EINTR - Interrupted function
 324 * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is
 325 * NULL, or newepd is NULL
 326 * ENODEV - The requesting node is lost or existed, but is not currently in the
 327 * network since it may have crashed
 328 * ENOMEM - Not enough space
 329 * ENOENT - Secondary part of epd registration failed
 330 */
 331int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t
 332                *newepd, int flags);
 333
 334/**
 335 * scif_close() - Close an endpoint
 336 * @epd:        endpoint descriptor
 337 *
 338 * scif_close() closes an endpoint and performs necessary teardown of
 339 * facilities associated with that endpoint.
 340 *
 341 * If epd is a listening endpoint then it will no longer accept connection
 342 * requests on the port to which it is bound. Any pending connection requests
 343 * are rejected.
 344 *
 345 * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
 346 * which are in-process through epd or its peer endpoint will complete before
 347 * scif_close() returns. Registered windows of the local and peer endpoints are
 348 * released as if scif_unregister() was called against each window.
 349 *
 350 * Closing a SCIF endpoint does not affect local registered memory mapped by
 351 * a SCIF endpoint on a remote node. The local memory remains mapped by the peer
 352 * SCIF endpoint explicitly removed by calling munmap(..) by the peer.
 353 *
 354 * If the peer endpoint's receive queue is not empty at the time that epd is
 355 * closed, then the peer endpoint can be passed as the endpoint parameter to
 356 * scif_recv() until the receive queue is empty.
 357 *
 358 * epd is freed and may no longer be accessed.
 359 *
 360 * Return:
 361 * Upon successful completion, scif_close() returns 0; otherwise in user mode
 362 * -1 is returned and errno is set to indicate the error; in kernel mode the
 363 * negative of one of the following errors is returned.
 364 *
 365 * Errors:
 366 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 367 */
 368int scif_close(scif_epd_t epd);
 369
 370/**
 371 * scif_send() - Send a message
 372 * @epd:        endpoint descriptor
 373 * @msg:        message buffer address
 374 * @len:        message length
 375 * @flags:      blocking mode flags
 376 *
 377 * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
 378 * are copied from memory starting at address msg. On successful execution the
 379 * return value of scif_send() is the number of bytes that were sent, and is
 380 * zero if no bytes were sent because len was zero. scif_send() may be called
 381 * only when the endpoint is in a connected state.
 382 *
 383 * If a scif_send() call is non-blocking, then it sends only those bytes which
 384 * can be sent without waiting, up to a maximum of len bytes.
 385 *
 386 * If a scif_send() call is blocking, then it normally returns after sending
 387 * all len bytes. If a blocking call is interrupted or the connection is
 388 * reset, the call is considered successful if some bytes were sent or len is
 389 * zero, otherwise the call is considered unsuccessful.
 390 *
 391 * In user mode, the select() and poll() functions can be used to determine
 392 * when the send queue is not full. In kernel mode, the scif_poll() function
 393 * may be used for this purpose.
 394 *
 395 * It is recommended that scif_send()/scif_recv() only be used for short
 396 * control-type message communication between SCIF endpoints. The SCIF RMA
 397 * APIs are expected to provide better performance for transfer sizes of
 398 * 1024 bytes or longer for the current MIC hardware and software
 399 * implementation.
 400 *
 401 * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK
 402 * is passed as the flags argument.
 403 *
 404 * Return:
 405 * Upon successful completion, scif_send() returns the number of bytes sent;
 406 * otherwise in user mode -1 is returned and errno is set to indicate the
 407 * error; in kernel mode the negative of one of the following errors is
 408 * returned.
 409 *
 410 * Errors:
 411 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 412 * ECONNRESET - Connection reset by peer
 413 * EINVAL - flags is invalid, or len is negative
 414 * ENODEV - The remote node is lost or existed, but is not currently in the
 415 * network since it may have crashed
 416 * ENOMEM - Not enough space
 417 * ENOTCONN - The endpoint is not connected
 418 */
 419int scif_send(scif_epd_t epd, void *msg, int len, int flags);
 420
 421/**
 422 * scif_recv() - Receive a message
 423 * @epd:        endpoint descriptor
 424 * @msg:        message buffer address
 425 * @len:        message buffer length
 426 * @flags:      blocking mode flags
 427 *
 428 * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
 429 * data are copied to memory starting at address msg. On successful execution
 430 * the return value of scif_recv() is the number of bytes that were received,
 431 * and is zero if no bytes were received because len was zero. scif_recv() may
 432 * be called only when the endpoint is in a connected state.
 433 *
 434 * If a scif_recv() call is non-blocking, then it receives only those bytes
 435 * which can be received without waiting, up to a maximum of len bytes.
 436 *
 437 * If a scif_recv() call is blocking, then it normally returns after receiving
 438 * all len bytes. If the blocking call was interrupted due to a disconnection,
 439 * subsequent calls to scif_recv() will copy all bytes received upto the point
 440 * of disconnection.
 441 *
 442 * In user mode, the select() and poll() functions can be used to determine
 443 * when data is available to be received. In kernel mode, the scif_poll()
 444 * function may be used for this purpose.
 445 *
 446 * It is recommended that scif_send()/scif_recv() only be used for short
 447 * control-type message communication between SCIF endpoints. The SCIF RMA
 448 * APIs are expected to provide better performance for transfer sizes of
 449 * 1024 bytes or longer for the current MIC hardware and software
 450 * implementation.
 451 *
 452 * scif_recv() will block until the entire message is received if
 453 * SCIF_RECV_BLOCK is passed as the flags argument.
 454 *
 455 * Return:
 456 * Upon successful completion, scif_recv() returns the number of bytes
 457 * received; otherwise in user mode -1 is returned and errno is set to
 458 * indicate the error; in kernel mode the negative of one of the following
 459 * errors is returned.
 460 *
 461 * Errors:
 462 * EAGAIN - The destination node is returning from a low power state
 463 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 464 * ECONNRESET - Connection reset by peer
 465 * EINVAL - flags is invalid, or len is negative
 466 * ENODEV - The remote node is lost or existed, but is not currently in the
 467 * network since it may have crashed
 468 * ENOMEM - Not enough space
 469 * ENOTCONN - The endpoint is not connected
 470 */
 471int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
 472
 473/**
 474 * scif_register() - Mark a memory region for remote access.
 475 * @epd:                endpoint descriptor
 476 * @addr:               starting virtual address
 477 * @len:                length of range
 478 * @offset:             offset of window
 479 * @prot_flags:         read/write protection flags
 480 * @map_flags:          mapping flags
 481 *
 482 * The scif_register() function opens a window, a range of whole pages of the
 483 * registered address space of the endpoint epd, starting at offset po and
 484 * continuing for len bytes. The value of po, further described below, is a
 485 * function of the parameters offset and len, and the value of map_flags. Each
 486 * page of the window represents the physical memory page which backs the
 487 * corresponding page of the range of virtual address pages starting at addr
 488 * and continuing for len bytes. addr and len are constrained to be multiples
 489 * of the page size. A successful scif_register() call returns po.
 490 *
 491 * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
 492 * exactly, and offset is constrained to be a multiple of the page size. The
 493 * mapping established by scif_register() will not replace any existing
 494 * registration; an error is returned if any page within the range [offset,
 495 * offset + len - 1] intersects an existing window.
 496 *
 497 * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
 498 * implementation-defined manner to arrive at po. The po value so chosen will
 499 * be an area of the registered address space that the implementation deems
 500 * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
 501 * granting the implementation complete freedom in selecting po, subject to
 502 * constraints described below. A non-zero value of offset is taken to be a
 503 * suggestion of an offset near which the mapping should be placed. When the
 504 * implementation selects a value for po, it does not replace any extant
 505 * window. In all cases, po will be a multiple of the page size.
 506 *
 507 * The physical pages which are so represented by a window are available for
 508 * access in calls to mmap(), scif_readfrom(), scif_writeto(),
 509 * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
 510 * physical pages represented by the window will not be reused by the memory
 511 * subsystem for any other purpose. Note that the same physical page may be
 512 * represented by multiple windows.
 513 *
 514 * Subsequent operations which change the memory pages to which virtual
 515 * addresses are mapped (such as mmap(), munmap()) have no effect on
 516 * existing window.
 517 *
 518 * If the process will fork(), it is recommended that the registered
 519 * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
 520 * problems due to copy-on-write semantics.
 521 *
 522 * The prot_flags argument is formed by OR'ing together one or more of the
 523 * following values.
 524 * SCIF_PROT_READ - allow read operations from the window
 525 * SCIF_PROT_WRITE - allow write operations to the window
 526 *
 527 * Return:
 528 * Upon successful completion, scif_register() returns the offset at which the
 529 * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
 530 * is (off_t *)-1) is returned and errno is set to indicate the error; in
 531 * kernel mode the negative of one of the following errors is returned.
 532 *
 533 * Errors:
 534 * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range
 535 * [offset, offset + len -1] are already registered
 536 * EAGAIN - The mapping could not be performed due to lack of resources
 537 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 538 * ECONNRESET - Connection reset by peer
 539 * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
 540 * set in flags, and offset is not a multiple of the page size, or addr is not a
 541 * multiple of the page size, or len is not a multiple of the page size, or is
 542 * 0, or offset is negative
 543 * ENODEV - The remote node is lost or existed, but is not currently in the
 544 * network since it may have crashed
 545 * ENOMEM - Not enough space
 546 * ENOTCONN -The endpoint is not connected
 547 */
 548off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
 549                    int prot_flags, int map_flags);
 550
 551/**
 552 * scif_unregister() - Mark a memory region for remote access.
 553 * @epd:        endpoint descriptor
 554 * @offset:     start of range to unregister
 555 * @len:        length of range to unregister
 556 *
 557 * The scif_unregister() function closes those previously registered windows
 558 * which are entirely within the range [offset, offset + len - 1]. It is an
 559 * error to specify a range which intersects only a subrange of a window.
 560 *
 561 * On a successful return, pages within the window may no longer be specified
 562 * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
 563 * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window,
 564 * however, continues to exist until all previous references against it are
 565 * removed. A window is referenced if there is a mapping to it created by
 566 * mmap(), or if scif_get_pages() was called against the window
 567 * (and the pages have not been returned via scif_put_pages()). A window is
 568 * also referenced while an RMA, in which some range of the window is a source
 569 * or destination, is in progress. Finally a window is referenced while some
 570 * offset in that window was specified to scif_fence_signal(), and the RMAs
 571 * marked by that call to scif_fence_signal() have not completed. While a
 572 * window is in this state, its registered address space pages are not
 573 * available for use in a new registered window.
 574 *
 575 * When all such references to the window have been removed, its references to
 576 * all the physical pages which it represents are removed. Similarly, the
 577 * registered address space pages of the window become available for
 578 * registration in a new window.
 579 *
 580 * Return:
 581 * Upon successful completion, scif_unregister() returns 0; otherwise in user
 582 * mode -1 is returned and errno is set to indicate the error; in kernel mode
 583 * the negative of one of the following errors is returned. In the event of an
 584 * error, no windows are unregistered.
 585 *
 586 * Errors:
 587 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 588 * ECONNRESET - Connection reset by peer
 589 * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a
 590 * window, or offset is negative
 591 * ENODEV - The remote node is lost or existed, but is not currently in the
 592 * network since it may have crashed
 593 * ENOTCONN - The endpoint is not connected
 594 * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the
 595 * registered address space of epd
 596 */
 597int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
 598
 599/**
 600 * scif_readfrom() - Copy from a remote address space
 601 * @epd:        endpoint descriptor
 602 * @loffset:    offset in local registered address space to
 603 *              which to copy
 604 * @len:        length of range to copy
 605 * @roffset:    offset in remote registered address space
 606 *              from which to copy
 607 * @rma_flags:  transfer mode flags
 608 *
 609 * scif_readfrom() copies len bytes from the remote registered address space of
 610 * the peer of endpoint epd, starting at the offset roffset to the local
 611 * registered address space of epd, starting at the offset loffset.
 612 *
 613 * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
 614 * roffset + len - 1] must be within some registered window or windows of the
 615 * local and remote nodes. A range may intersect multiple registered windows,
 616 * but only if those windows are contiguous in the registered address space.
 617 *
 618 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
 619 * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
 620 * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
 621 * transfer is complete. Otherwise, the transfer may be performed asynchron-
 622 * ously. The order in which any two asynchronous RMA operations complete
 623 * is non-deterministic. The synchronization functions, scif_fence_mark()/
 624 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
 625 * the completion of asynchronous RMA operations on the same endpoint.
 626 *
 627 * The DMA transfer of individual bytes is not guaranteed to complete in
 628 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
 629 * cacheline or partial cacheline of the source range will become visible on
 630 * the destination node after all other transferred data in the source
 631 * range has become visible on the destination node.
 632 *
 633 * The optimal DMA performance will likely be realized if both
 634 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
 635 * performance will likely be realized if loffset and roffset are not
 636 * cacheline aligned but are separated by some multiple of 64. The lowest level
 637 * of performance is likely if loffset and roffset are not separated by a
 638 * multiple of 64.
 639 *
 640 * The rma_flags argument is formed by ORing together zero or more of the
 641 * following values.
 642 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
 643 *      engine.
 644 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
 645 *              transfer has completed. Passing this flag results in the
 646 *              current implementation busy waiting and consuming CPU cycles
 647 *              while the DMA transfer is in progress for best performance by
 648 *              avoiding the interrupt latency.
 649 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
 650 *              the source range becomes visible on the destination node
 651 *              after all other transferred data in the source range has
 652 *              become visible on the destination
 653 *
 654 * Return:
 655 * Upon successful completion, scif_readfrom() returns 0; otherwise in user
 656 * mode -1 is returned and errno is set to indicate the error; in kernel mode
 657 * the negative of one of the following errors is returned.
 658 *
 659 * Errors:
 660 * EACCESS - Attempt to write to a read-only range
 661 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 662 * ECONNRESET - Connection reset by peer
 663 * EINVAL - rma_flags is invalid
 664 * ENODEV - The remote node is lost or existed, but is not currently in the
 665 * network since it may have crashed
 666 * ENOTCONN - The endpoint is not connected
 667 * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
 668 * address space of epd, or, The range [roffset, roffset + len - 1] is invalid
 669 * for the registered address space of the peer of epd, or loffset or roffset
 670 * is negative
 671 */
 672int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
 673                  roffset, int rma_flags);
 674
 675/**
 676 * scif_writeto() - Copy to a remote address space
 677 * @epd:        endpoint descriptor
 678 * @loffset:    offset in local registered address space
 679 *              from which to copy
 680 * @len:        length of range to copy
 681 * @roffset:    offset in remote registered address space to
 682 *              which to copy
 683 * @rma_flags:  transfer mode flags
 684 *
 685 * scif_writeto() copies len bytes from the local registered address space of
 686 * epd, starting at the offset loffset to the remote registered address space
 687 * of the peer of endpoint epd, starting at the offset roffset.
 688 *
 689 * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
 690 * roffset + len - 1] must be within some registered window or windows of the
 691 * local and remote nodes. A range may intersect multiple registered windows,
 692 * but only if those windows are contiguous in the registered address space.
 693 *
 694 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
 695 * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
 696 * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
 697 * transfer is complete. Otherwise, the transfer may be performed asynchron-
 698 * ously. The order in which any two asynchronous RMA operations complete
 699 * is non-deterministic. The synchronization functions, scif_fence_mark()/
 700 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
 701 * the completion of asynchronous RMA operations on the same endpoint.
 702 *
 703 * The DMA transfer of individual bytes is not guaranteed to complete in
 704 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
 705 * cacheline or partial cacheline of the source range will become visible on
 706 * the destination node after all other transferred data in the source
 707 * range has become visible on the destination node.
 708 *
 709 * The optimal DMA performance will likely be realized if both
 710 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
 711 * performance will likely be realized if loffset and roffset are not cacheline
 712 * aligned but are separated by some multiple of 64. The lowest level of
 713 * performance is likely if loffset and roffset are not separated by a multiple
 714 * of 64.
 715 *
 716 * The rma_flags argument is formed by ORing together zero or more of the
 717 * following values.
 718 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
 719 *                      engine.
 720 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
 721 *              transfer has completed. Passing this flag results in the
 722 *              current implementation busy waiting and consuming CPU cycles
 723 *              while the DMA transfer is in progress for best performance by
 724 *              avoiding the interrupt latency.
 725 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
 726 *              the source range becomes visible on the destination node
 727 *              after all other transferred data in the source range has
 728 *              become visible on the destination
 729 *
 730 * Return:
 731 * Upon successful completion, scif_readfrom() returns 0; otherwise in user
 732 * mode -1 is returned and errno is set to indicate the error; in kernel mode
 733 * the negative of one of the following errors is returned.
 734 *
 735 * Errors:
 736 * EACCESS - Attempt to write to a read-only range
 737 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 738 * ECONNRESET - Connection reset by peer
 739 * EINVAL - rma_flags is invalid
 740 * ENODEV - The remote node is lost or existed, but is not currently in the
 741 * network since it may have crashed
 742 * ENOTCONN - The endpoint is not connected
 743 * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
 744 * address space of epd, or, The range [roffset , roffset + len -1] is invalid
 745 * for the registered address space of the peer of epd, or loffset or roffset
 746 * is negative
 747 */
 748int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
 749                 roffset, int rma_flags);
 750
 751/**
 752 * scif_vreadfrom() - Copy from a remote address space
 753 * @epd:        endpoint descriptor
 754 * @addr:       address to which to copy
 755 * @len:        length of range to copy
 756 * @roffset:    offset in remote registered address space
 757 *              from which to copy
 758 * @rma_flags:  transfer mode flags
 759 *
 760 * scif_vreadfrom() copies len bytes from the remote registered address
 761 * space of the peer of endpoint epd, starting at the offset roffset, to local
 762 * memory, starting at addr.
 763 *
 764 * The specified range [roffset, roffset + len - 1] must be within some
 765 * registered window or windows of the remote nodes. The range may
 766 * intersect multiple registered windows, but only if those windows are
 767 * contiguous in the registered address space.
 768 *
 769 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
 770 * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
 771 * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
 772 * transfer is complete. Otherwise, the transfer may be performed asynchron-
 773 * ously. The order in which any two asynchronous RMA operations complete
 774 * is non-deterministic. The synchronization functions, scif_fence_mark()/
 775 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
 776 * the completion of asynchronous RMA operations on the same endpoint.
 777 *
 778 * The DMA transfer of individual bytes is not guaranteed to complete in
 779 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
 780 * cacheline or partial cacheline of the source range will become visible on
 781 * the destination node after all other transferred data in the source
 782 * range has become visible on the destination node.
 783 *
 784 * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
 785 * the specified local memory range may be remain in a pinned state even after
 786 * the specified transfer completes. This may reduce overhead if some or all of
 787 * the same virtual address range is referenced in a subsequent call of
 788 * scif_vreadfrom() or scif_vwriteto().
 789 *
 790 * The optimal DMA performance will likely be realized if both
 791 * addr and roffset are cacheline aligned (are a multiple of 64). Lower
 792 * performance will likely be realized if addr and roffset are not
 793 * cacheline aligned but are separated by some multiple of 64. The lowest level
 794 * of performance is likely if addr and roffset are not separated by a
 795 * multiple of 64.
 796 *
 797 * The rma_flags argument is formed by ORing together zero or more of the
 798 * following values.
 799 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
 800 *      engine.
 801 * SCIF_RMA_USECACHE - enable registration caching
 802 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
 803 *              transfer has completed. Passing this flag results in the
 804 *              current implementation busy waiting and consuming CPU cycles
 805 *              while the DMA transfer is in progress for best performance by
 806 *              avoiding the interrupt latency.
 807 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
 808 *      the source range becomes visible on the destination node
 809 *      after all other transferred data in the source range has
 810 *      become visible on the destination
 811 *
 812 * Return:
 813 * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user
 814 * mode -1 is returned and errno is set to indicate the error; in kernel mode
 815 * the negative of one of the following errors is returned.
 816 *
 817 * Errors:
 818 * EACCESS - Attempt to write to a read-only range
 819 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 820 * ECONNRESET - Connection reset by peer
 821 * EINVAL - rma_flags is invalid
 822 * ENODEV - The remote node is lost or existed, but is not currently in the
 823 * network since it may have crashed
 824 * ENOTCONN - The endpoint is not connected
 825 * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
 826 * registered address space of epd
 827 */
 828int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
 829                   int rma_flags);
 830
 831/**
 832 * scif_vwriteto() - Copy to a remote address space
 833 * @epd:        endpoint descriptor
 834 * @addr:       address from which to copy
 835 * @len:        length of range to copy
 836 * @roffset:    offset in remote registered address space to
 837 *              which to copy
 838 * @rma_flags:  transfer mode flags
 839 *
 840 * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
 841 * the remote registered address space of the peer of endpoint epd, starting at
 842 * the offset roffset.
 843 *
 844 * The specified range [roffset, roffset + len - 1] must be within some
 845 * registered window or windows of the remote nodes. The range may intersect
 846 * multiple registered windows, but only if those windows are contiguous in the
 847 * registered address space.
 848 *
 849 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
 850 * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
 851 * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
 852 * transfer is complete. Otherwise, the transfer may be performed asynchron-
 853 * ously. The order in which any two asynchronous RMA operations complete
 854 * is non-deterministic. The synchronization functions, scif_fence_mark()/
 855 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
 856 * the completion of asynchronous RMA operations on the same endpoint.
 857 *
 858 * The DMA transfer of individual bytes is not guaranteed to complete in
 859 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
 860 * cacheline or partial cacheline of the source range will become visible on
 861 * the destination node after all other transferred data in the source
 862 * range has become visible on the destination node.
 863 *
 864 * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
 865 * the specified local memory range may be remain in a pinned state even after
 866 * the specified transfer completes. This may reduce overhead if some or all of
 867 * the same virtual address range is referenced in a subsequent call of
 868 * scif_vreadfrom() or scif_vwriteto().
 869 *
 870 * The optimal DMA performance will likely be realized if both
 871 * addr and offset are cacheline aligned (are a multiple of 64). Lower
 872 * performance will likely be realized if addr and offset are not cacheline
 873 * aligned but are separated by some multiple of 64. The lowest level of
 874 * performance is likely if addr and offset are not separated by a multiple of
 875 * 64.
 876 *
 877 * The rma_flags argument is formed by ORing together zero or more of the
 878 * following values.
 879 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
 880 *      engine.
 881 * SCIF_RMA_USECACHE - allow registration caching
 882 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
 883 *              transfer has completed. Passing this flag results in the
 884 *              current implementation busy waiting and consuming CPU cycles
 885 *              while the DMA transfer is in progress for best performance by
 886 *              avoiding the interrupt latency.
 887 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
 888 *              the source range becomes visible on the destination node
 889 *              after all other transferred data in the source range has
 890 *              become visible on the destination
 891 *
 892 * Return:
 893 * Upon successful completion, scif_vwriteto() returns 0; otherwise in user
 894 * mode -1 is returned and errno is set to indicate the error; in kernel mode
 895 * the negative of one of the following errors is returned.
 896 *
 897 * Errors:
 898 * EACCESS - Attempt to write to a read-only range
 899 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 900 * ECONNRESET - Connection reset by peer
 901 * EINVAL - rma_flags is invalid
 902 * ENODEV - The remote node is lost or existed, but is not currently in the
 903 * network since it may have crashed
 904 * ENOTCONN - The endpoint is not connected
 905 * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
 906 * registered address space of epd
 907 */
 908int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset,
 909                  int rma_flags);
 910
 911/**
 912 * scif_fence_mark() - Mark previously issued RMAs
 913 * @epd:        endpoint descriptor
 914 * @flags:      control flags
 915 * @mark:       marked value returned as output.
 916 *
 917 * scif_fence_mark() returns after marking the current set of all uncompleted
 918 * RMAs initiated through the endpoint epd or the current set of all
 919 * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
 920 * marked with a value returned at mark. The application may subsequently call
 921 * scif_fence_wait(), passing the value returned at mark, to await completion
 922 * of all RMAs so marked.
 923 *
 924 * The flags argument has exactly one of the following values.
 925 * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
 926 *      epd are marked
 927 * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
 928 *      of endpoint epd are marked
 929 *
 930 * Return:
 931 * Upon successful completion, scif_fence_mark() returns 0; otherwise in user
 932 * mode -1 is returned and errno is set to indicate the error; in kernel mode
 933 * the negative of one of the following errors is returned.
 934 *
 935 * Errors:
 936 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 937 * ECONNRESET - Connection reset by peer
 938 * EINVAL - flags is invalid
 939 * ENODEV - The remote node is lost or existed, but is not currently in the
 940 * network since it may have crashed
 941 * ENOTCONN - The endpoint is not connected
 942 * ENOMEM - Insufficient kernel memory was available
 943 */
 944int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
 945
 946/**
 947 * scif_fence_wait() - Wait for completion of marked RMAs
 948 * @epd:        endpoint descriptor
 949 * @mark:       mark request
 950 *
 951 * scif_fence_wait() returns after all RMAs marked with mark have completed.
 952 * The value passed in mark must have been obtained in a previous call to
 953 * scif_fence_mark().
 954 *
 955 * Return:
 956 * Upon successful completion, scif_fence_wait() returns 0; otherwise in user
 957 * mode -1 is returned and errno is set to indicate the error; in kernel mode
 958 * the negative of one of the following errors is returned.
 959 *
 960 * Errors:
 961 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
 962 * ECONNRESET - Connection reset by peer
 963 * ENODEV - The remote node is lost or existed, but is not currently in the
 964 * network since it may have crashed
 965 * ENOTCONN - The endpoint is not connected
 966 * ENOMEM - Insufficient kernel memory was available
 967 */
 968int scif_fence_wait(scif_epd_t epd, int mark);
 969
 970/**
 971 * scif_fence_signal() - Request a memory update on completion of RMAs
 972 * @epd:        endpoint descriptor
 973 * @loff:       local offset
 974 * @lval:       local value to write to loffset
 975 * @roff:       remote offset
 976 * @rval:       remote value to write to roffset
 977 * @flags:      flags
 978 *
 979 * scif_fence_signal() returns after marking the current set of all uncompleted
 980 * RMAs initiated through the endpoint epd or marking the current set of all
 981 * uncompleted RMAs initiated through the peer of endpoint epd.
 982 *
 983 * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
 984 * marked set, lval is written to memory at the address corresponding to offset
 985 * loff in the local registered address space of epd. loff must be within a
 986 * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
 987 * of the RMAs in the marked set, rval is written to memory at the address
 988 * corresponding to offset roff in the remote registered address space of epd.
 989 * roff must be within a remote registered window of the peer of epd. Note
 990 * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
 991 *
 992 * The flags argument is formed by OR'ing together the following.
 993 * Exactly one of the following values.
 994 * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
 995 *      epd are marked
 996 * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
 997 *      of endpoint epd are marked
 998 * One or more of the following values.
 999 * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to
1000 *      memory at the address corresponding to offset loff in the local
1001 *      registered address space of epd.
1002 * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to
1003 *      memory at the address corresponding to offset roff in the remote
1004 *      registered address space of epd.
1005 *
1006 * Return:
1007 * Upon successful completion, scif_fence_signal() returns 0; otherwise in
1008 * user mode -1 is returned and errno is set to indicate the error; in kernel
1009 * mode the negative of one of the following errors is returned.
1010 *
1011 * Errors:
1012 * EBADF, ENOTTY - epd is not a valid endpoint descriptor
1013 * ECONNRESET - Connection reset by peer
1014 * EINVAL - flags is invalid, or loff or roff are not DWORD aligned
1015 * ENODEV - The remote node is lost or existed, but is not currently in the
1016 * network since it may have crashed
1017 * ENOTCONN - The endpoint is not connected
1018 * ENXIO - loff is invalid for the registered address of epd, or roff is invalid
1019 * for the registered address space, of the peer of epd
1020 */
1021int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
1022                      u64 rval, int flags);
1023
1024/**
1025 * scif_get_node_ids() - Return information about online nodes
1026 * @nodes:      array in which to return online node IDs
1027 * @len:        number of entries in the nodes array
1028 * @self:       address to place the node ID of the local node
1029 *
1030 * scif_get_node_ids() fills in the nodes array with up to len node IDs of the
1031 * nodes in the SCIF network. If there is not enough space in nodes, as
1032 * indicated by the len parameter, only len node IDs are returned in nodes. The
1033 * return value of scif_get_node_ids() is the total number of nodes currently in
1034 * the SCIF network. By checking the return value against the len parameter,
1035 * the user may determine if enough space for nodes was allocated.
1036 *
1037 * The node ID of the local node is returned at self.
1038 *
1039 * Return:
1040 * Upon successful completion, scif_get_node_ids() returns the actual number of
1041 * online nodes in the SCIF network including 'self'; otherwise in user mode
1042 * -1 is returned and errno is set to indicate the error; in kernel mode no
1043 * errors are returned.
1044 */
1045int scif_get_node_ids(u16 *nodes, int len, u16 *self);
1046
1047/**
1048 * scif_pin_pages() - Pin a set of pages
1049 * @addr:               Virtual address of range to pin
1050 * @len:                Length of range to pin
1051 * @prot_flags:         Page protection flags
1052 * @map_flags:          Page classification flags
1053 * @pinned_pages:       Handle to pinned pages
1054 *
1055 * scif_pin_pages() pins (locks in physical memory) the physical pages which
1056 * back the range of virtual address pages starting at addr and continuing for
1057 * len bytes. addr and len are constrained to be multiples of the page size. A
1058 * successful scif_pin_pages() call returns a handle to pinned_pages which may
1059 * be used in subsequent calls to scif_register_pinned_pages().
1060 *
1061 * The pages will remain pinned as long as there is a reference against the
1062 * scif_pinned_pages_t value returned by scif_pin_pages() and until
1063 * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
1064 * reference is added to a scif_pinned_pages_t value each time a window is
1065 * created by calling scif_register_pinned_pages() and passing the
1066 * scif_pinned_pages_t value. A reference is removed from a
1067 * scif_pinned_pages_t value each time such a window is deleted.
1068 *
1069 * Subsequent operations which change the memory pages to which virtual
1070 * addresses are mapped (such as mmap(), munmap()) have no effect on the
1071 * scif_pinned_pages_t value or windows created against it.
1072 *
1073 * If the process will fork(), it is recommended that the registered
1074 * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
1075 * problems due to copy-on-write semantics.
1076 *
1077 * The prot_flags argument is formed by OR'ing together one or more of the
1078 * following values.
1079 * SCIF_PROT_READ - allow read operations against the pages
1080 * SCIF_PROT_WRITE - allow write operations against the pages
1081 * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a
1082 * kernel space address. By default, addr is interpreted as a user space
1083 * address.
1084 *
1085 * Return:
1086 * Upon successful completion, scif_pin_pages() returns 0; otherwise the
1087 * negative of one of the following errors is returned.
1088 *
1089 * Errors:
1090 * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative
1091 * ENOMEM - Not enough space
1092 */
1093int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags,
1094                   scif_pinned_pages_t *pinned_pages);
1095
1096/**
1097 * scif_unpin_pages() - Unpin a set of pages
1098 * @pinned_pages:       Handle to pinned pages to be unpinned
1099 *
1100 * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new
1101 * windows against pinned_pages. The physical pages represented by pinned_pages
1102 * will remain pinned until all windows previously registered against
1103 * pinned_pages are deleted (the window is scif_unregister()'d and all
1104 * references to the window are removed (see scif_unregister()).
1105 *
1106 * pinned_pages must have been obtain from a previous call to scif_pin_pages().
1107 * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
1108 * scif_register_pinned_pages().
1109 *
1110 * Return:
1111 * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
1112 * negative of one of the following errors is returned.
1113 *
1114 * Errors:
1115 * EINVAL - pinned_pages is not valid
1116 */
1117int scif_unpin_pages(scif_pinned_pages_t pinned_pages);
1118
1119/**
1120 * scif_register_pinned_pages() - Mark a memory region for remote access.
1121 * @epd:                endpoint descriptor
1122 * @pinned_pages:       Handle to pinned pages
1123 * @offset:             Registered address space offset
1124 * @map_flags:          Flags which control where pages are mapped
1125 *
1126 * The scif_register_pinned_pages() function opens a window, a range of whole
1127 * pages of the registered address space of the endpoint epd, starting at
1128 * offset po. The value of po, further described below, is a function of the
1129 * parameters offset and pinned_pages, and the value of map_flags. Each page of
1130 * the window represents a corresponding physical memory page of the range
1131 * represented by pinned_pages; the length of the window is the same as the
1132 * length of range represented by pinned_pages. A successful
1133 * scif_register_pinned_pages() call returns po as the return value.
1134 *
1135 * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
1136 * exactly, and offset is constrained to be a multiple of the page size. The
1137 * mapping established by scif_register_pinned_pages() will not replace any
1138 * existing registration; an error is returned if any page of the new window
1139 * would intersect an existing window.
1140 *
1141 * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
1142 * implementation-defined manner to arrive at po. The po so chosen will be an
1143 * area of the registered address space that the implementation deems suitable
1144 * for a mapping of the required size. An offset value of 0 is interpreted as
1145 * granting the implementation complete freedom in selecting po, subject to
1146 * constraints described below. A non-zero value of offset is taken to be a
1147 * suggestion of an offset near which the mapping should be placed. When the
1148 * implementation selects a value for po, it does not replace any extant
1149 * window. In all cases, po will be a multiple of the page size.
1150 *
1151 * The physical pages which are so represented by a window are available for
1152 * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
1153 * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
1154 * physical pages represented by the window will not be reused by the memory
1155 * subsystem for any other purpose. Note that the same physical page may be
1156 * represented by multiple windows.
1157 *
1158 * Windows created by scif_register_pinned_pages() are unregistered by
1159 * scif_unregister().
1160 *
1161 * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
1162 * fixed offset.
1163 *
1164 * Return:
1165 * Upon successful completion, scif_register_pinned_pages() returns the offset
1166 * at which the mapping was placed (po); otherwise the negative of one of the
1167 * following errors is returned.
1168 *
1169 * Errors:
1170 * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window
1171 * would intersect an existing window
1172 * EAGAIN - The mapping could not be performed due to lack of resources
1173 * ECONNRESET - Connection reset by peer
1174 * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and
1175 * offset is not a multiple of the page size, or offset is negative
1176 * ENODEV - The remote node is lost or existed, but is not currently in the
1177 * network since it may have crashed
1178 * ENOMEM - Not enough space
1179 * ENOTCONN - The endpoint is not connected
1180 */
1181off_t scif_register_pinned_pages(scif_epd_t epd,
1182                                 scif_pinned_pages_t pinned_pages,
1183                                 off_t offset, int map_flags);
1184
1185/**
1186 * scif_get_pages() - Add references to remote registered pages
1187 * @epd:        endpoint descriptor
1188 * @offset:     remote registered offset
1189 * @len:        length of range of pages
1190 * @pages:      returned scif_range structure
1191 *
1192 * scif_get_pages() returns the addresses of the physical pages represented by
1193 * those pages of the registered address space of the peer of epd, starting at
1194 * offset and continuing for len bytes. offset and len are constrained to be
1195 * multiples of the page size.
1196 *
1197 * All of the pages in the specified range [offset, offset + len - 1] must be
1198 * within a single window of the registered address space of the peer of epd.
1199 *
1200 * The addresses are returned as a virtually contiguous array pointed to by the
1201 * phys_addr component of the scif_range structure whose address is returned in
1202 * pages. The nr_pages component of scif_range is the length of the array. The
1203 * prot_flags component of scif_range holds the protection flag value passed
1204 * when the pages were registered.
1205 *
1206 * Each physical page whose address is returned by scif_get_pages() remains
1207 * available and will not be released for reuse until the scif_range structure
1208 * is returned in a call to scif_put_pages(). The scif_range structure returned
1209 * by scif_get_pages() must be unmodified.
1210 *
1211 * It is an error to call scif_close() on an endpoint on which a scif_range
1212 * structure of that endpoint has not been returned to scif_put_pages().
1213 *
1214 * Return:
1215 * Upon successful completion, scif_get_pages() returns 0; otherwise the
1216 * negative of one of the following errors is returned.
1217 * Errors:
1218 * ECONNRESET - Connection reset by peer.
1219 * EINVAL - offset is not a multiple of the page size, or offset is negative, or
1220 * len is not a multiple of the page size
1221 * ENODEV - The remote node is lost or existed, but is not currently in the
1222 * network since it may have crashed
1223 * ENOTCONN - The endpoint is not connected
1224 * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid
1225 * for the registered address space of the peer epd
1226 */
1227int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
1228                   struct scif_range **pages);
1229
1230/**
1231 * scif_put_pages() - Remove references from remote registered pages
1232 * @pages:      pages to be returned
1233 *
1234 * scif_put_pages() releases a scif_range structure previously obtained by
1235 * calling scif_get_pages(). The physical pages represented by pages may
1236 * be reused when the window which represented those pages is unregistered.
1237 * Therefore, those pages must not be accessed after calling scif_put_pages().
1238 *
1239 * Return:
1240 * Upon successful completion, scif_put_pages() returns 0; otherwise the
1241 * negative of one of the following errors is returned.
1242 * Errors:
1243 * EINVAL - pages does not point to a valid scif_range structure, or
1244 * the scif_range structure pointed to by pages was already returned
1245 * ENODEV - The remote node is lost or existed, but is not currently in the
1246 * network since it may have crashed
1247 * ENOTCONN - The endpoint is not connected
1248 */
1249int scif_put_pages(struct scif_range *pages);
1250
1251/**
1252 * scif_poll() - Wait for some event on an endpoint
1253 * @epds:       Array of endpoint descriptors
1254 * @nepds:      Length of epds
1255 * @timeout:    Upper limit on time for which scif_poll() will block
1256 *
1257 * scif_poll() waits for one of a set of endpoints to become ready to perform
1258 * an I/O operation.
1259 *
1260 * The epds argument specifies the endpoint descriptors to be examined and the
1261 * events of interest for each endpoint descriptor. epds is a pointer to an
1262 * array with one member for each open endpoint descriptor of interest.
1263 *
1264 * The number of items in the epds array is specified in nepds. The epd field
1265 * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
1266 * events is a bitmask specifying the events which the application is
1267 * interested in. The field revents is an output parameter, filled by the
1268 * kernel with the events that actually occurred. The bits returned in revents
1269 * can include any of those specified in events, or one of the values EPOLLERR,
1270 * EPOLLHUP, or EPOLLNVAL. (These three bits are meaningless in the events
1271 * field, and will be set in the revents field whenever the corresponding
1272 * condition is true.)
1273 *
1274 * If none of the events requested (and no error) has occurred for any of the
1275 * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
1276 *
1277 * The timeout argument specifies an upper limit on the time for which
1278 * scif_poll() will block, in milliseconds. Specifying a negative value in
1279 * timeout means an infinite timeout.
1280 *
1281 * The following bits may be set in events and returned in revents.
1282 * EPOLLIN - Data may be received without blocking. For a connected
1283 * endpoint, this means that scif_recv() may be called without blocking. For a
1284 * listening endpoint, this means that scif_accept() may be called without
1285 * blocking.
1286 * EPOLLOUT - Data may be sent without blocking. For a connected endpoint, this
1287 * means that scif_send() may be called without blocking. EPOLLOUT may also be
1288 * used to block waiting for a non-blocking connect to complete. This bit value
1289 * has no meaning for a listening endpoint and is ignored if specified.
1290 *
1291 * The following bits are only returned in revents, and are ignored if set in
1292 * events.
1293 * EPOLLERR - An error occurred on the endpoint
1294 * EPOLLHUP - The connection to the peer endpoint was disconnected
1295 * EPOLLNVAL - The specified endpoint descriptor is invalid.
1296 *
1297 * Return:
1298 * Upon successful completion, scif_poll() returns a non-negative value. A
1299 * positive value indicates the total number of endpoint descriptors that have
1300 * been selected (that is, endpoint descriptors for which the revents member is
1301 * non-zero). A value of 0 indicates that the call timed out and no endpoint
1302 * descriptors have been selected. Otherwise in user mode -1 is returned and
1303 * errno is set to indicate the error; in kernel mode the negative of one of
1304 * the following errors is returned.
1305 *
1306 * Errors:
1307 * EINTR - A signal occurred before any requested event
1308 * EINVAL - The nepds argument is greater than {OPEN_MAX}
1309 * ENOMEM - There was no space to allocate file descriptor tables
1310 */
1311int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout);
1312
1313/**
1314 * scif_client_register() - Register a SCIF client
1315 * @client:     client to be registered
1316 *
1317 * scif_client_register() registers a SCIF client. The probe() method
1318 * of the client is called when SCIF peer devices come online and the
1319 * remove() method is called when the peer devices disappear.
1320 *
1321 * Return:
1322 * Upon successful completion, scif_client_register() returns a non-negative
1323 * value. Otherwise the return value is the same as subsys_interface_register()
1324 * in the kernel.
1325 */
1326int scif_client_register(struct scif_client *client);
1327
1328/**
1329 * scif_client_unregister() - Unregister a SCIF client
1330 * @client:     client to be unregistered
1331 *
1332 * scif_client_unregister() unregisters a SCIF client.
1333 *
1334 * Return:
1335 * None
1336 */
1337void scif_client_unregister(struct scif_client *client);
1338
1339#endif /* __SCIF_H__ */
1340