linux/drivers/misc/mic/scif/scif_api.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Intel MIC Platform Software Stack (MPSS)
   4 *
   5 * Copyright(c) 2014 Intel Corporation.
   6 *
   7 * Intel SCIF driver.
   8 */
   9#include <linux/scif.h>
  10#include "scif_main.h"
  11#include "scif_map.h"
  12
  13static const char * const scif_ep_states[] = {
  14        "Unbound",
  15        "Bound",
  16        "Listening",
  17        "Connected",
  18        "Connecting",
  19        "Mapping",
  20        "Closing",
  21        "Close Listening",
  22        "Disconnected",
  23        "Zombie"};
  24
  25enum conn_async_state {
  26        ASYNC_CONN_IDLE = 1,    /* ep setup for async connect */
  27        ASYNC_CONN_INPROGRESS,  /* async connect in progress */
  28        ASYNC_CONN_FLUSH_WORK   /* async work flush in progress  */
  29};
  30
  31/*
  32 * File operations for anonymous inode file associated with a SCIF endpoint,
  33 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
  34 * poll API in the kernel and these take in a struct file *. Since a struct
  35 * file is not available to kernel mode SCIF, it uses an anonymous file for
  36 * this purpose.
  37 */
  38const struct file_operations scif_anon_fops = {
  39        .owner = THIS_MODULE,
  40};
  41
  42scif_epd_t scif_open(void)
  43{
  44        struct scif_endpt *ep;
  45        int err;
  46
  47        might_sleep();
  48        ep = kzalloc(sizeof(*ep), GFP_KERNEL);
  49        if (!ep)
  50                goto err_ep_alloc;
  51
  52        ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
  53        if (!ep->qp_info.qp)
  54                goto err_qp_alloc;
  55
  56        err = scif_anon_inode_getfile(ep);
  57        if (err)
  58                goto err_anon_inode;
  59
  60        spin_lock_init(&ep->lock);
  61        mutex_init(&ep->sendlock);
  62        mutex_init(&ep->recvlock);
  63
  64        scif_rma_ep_init(ep);
  65        ep->state = SCIFEP_UNBOUND;
  66        dev_dbg(scif_info.mdev.this_device,
  67                "SCIFAPI open: ep %p success\n", ep);
  68        return ep;
  69
  70err_anon_inode:
  71        kfree(ep->qp_info.qp);
  72err_qp_alloc:
  73        kfree(ep);
  74err_ep_alloc:
  75        return NULL;
  76}
  77EXPORT_SYMBOL_GPL(scif_open);
  78
  79/*
  80 * scif_disconnect_ep - Disconnects the endpoint if found
  81 * @epd: The end point returned from scif_open()
  82 */
  83static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
  84{
  85        struct scifmsg msg;
  86        struct scif_endpt *fep = NULL;
  87        struct scif_endpt *tmpep;
  88        struct list_head *pos, *tmpq;
  89        int err;
  90
  91        /*
  92         * Wake up any threads blocked in send()/recv() before closing
  93         * out the connection. Grabbing and releasing the send/recv lock
  94         * will ensure that any blocked senders/receivers have exited for
  95         * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
  96         * close. Ring 3 endpoints are not affected since close will not
  97         * be called while there are IOCTLs executing.
  98         */
  99        wake_up_interruptible(&ep->sendwq);
 100        wake_up_interruptible(&ep->recvwq);
 101        mutex_lock(&ep->sendlock);
 102        mutex_unlock(&ep->sendlock);
 103        mutex_lock(&ep->recvlock);
 104        mutex_unlock(&ep->recvlock);
 105
 106        /* Remove from the connected list */
 107        mutex_lock(&scif_info.connlock);
 108        list_for_each_safe(pos, tmpq, &scif_info.connected) {
 109                tmpep = list_entry(pos, struct scif_endpt, list);
 110                if (tmpep == ep) {
 111                        list_del(pos);
 112                        fep = tmpep;
 113                        spin_lock(&ep->lock);
 114                        break;
 115                }
 116        }
 117
 118        if (!fep) {
 119                /*
 120                 * The other side has completed the disconnect before
 121                 * the end point can be removed from the list. Therefore
 122                 * the ep lock is not locked, traverse the disconnected
 123                 * list to find the endpoint and release the conn lock.
 124                 */
 125                list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 126                        tmpep = list_entry(pos, struct scif_endpt, list);
 127                        if (tmpep == ep) {
 128                                list_del(pos);
 129                                break;
 130                        }
 131                }
 132                mutex_unlock(&scif_info.connlock);
 133                return NULL;
 134        }
 135
 136        init_completion(&ep->discon);
 137        msg.uop = SCIF_DISCNCT;
 138        msg.src = ep->port;
 139        msg.dst = ep->peer;
 140        msg.payload[0] = (u64)ep;
 141        msg.payload[1] = ep->remote_ep;
 142
 143        err = scif_nodeqp_send(ep->remote_dev, &msg);
 144        spin_unlock(&ep->lock);
 145        mutex_unlock(&scif_info.connlock);
 146
 147        if (!err)
 148                /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
 149                wait_for_completion_timeout(&ep->discon,
 150                                            SCIF_NODE_ALIVE_TIMEOUT);
 151        return ep;
 152}
 153
 154int scif_close(scif_epd_t epd)
 155{
 156        struct scif_endpt *ep = (struct scif_endpt *)epd;
 157        struct scif_endpt *tmpep;
 158        struct list_head *pos, *tmpq;
 159        enum scif_epd_state oldstate;
 160        bool flush_conn;
 161
 162        dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
 163                ep, scif_ep_states[ep->state]);
 164        might_sleep();
 165        spin_lock(&ep->lock);
 166        flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
 167        spin_unlock(&ep->lock);
 168
 169        if (flush_conn)
 170                flush_work(&scif_info.conn_work);
 171
 172        spin_lock(&ep->lock);
 173        oldstate = ep->state;
 174
 175        ep->state = SCIFEP_CLOSING;
 176
 177        switch (oldstate) {
 178        case SCIFEP_ZOMBIE:
 179                dev_err(scif_info.mdev.this_device,
 180                        "SCIFAPI close: zombie state unexpected\n");
 181                fallthrough;
 182        case SCIFEP_DISCONNECTED:
 183                spin_unlock(&ep->lock);
 184                scif_unregister_all_windows(epd);
 185                /* Remove from the disconnected list */
 186                mutex_lock(&scif_info.connlock);
 187                list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 188                        tmpep = list_entry(pos, struct scif_endpt, list);
 189                        if (tmpep == ep) {
 190                                list_del(pos);
 191                                break;
 192                        }
 193                }
 194                mutex_unlock(&scif_info.connlock);
 195                break;
 196        case SCIFEP_UNBOUND:
 197        case SCIFEP_BOUND:
 198        case SCIFEP_CONNECTING:
 199                spin_unlock(&ep->lock);
 200                break;
 201        case SCIFEP_MAPPING:
 202        case SCIFEP_CONNECTED:
 203        case SCIFEP_CLOSING:
 204        {
 205                spin_unlock(&ep->lock);
 206                scif_unregister_all_windows(epd);
 207                scif_disconnect_ep(ep);
 208                break;
 209        }
 210        case SCIFEP_LISTENING:
 211        case SCIFEP_CLLISTEN:
 212        {
 213                struct scif_conreq *conreq;
 214                struct scifmsg msg;
 215                struct scif_endpt *aep;
 216
 217                spin_unlock(&ep->lock);
 218                mutex_lock(&scif_info.eplock);
 219
 220                /* remove from listen list */
 221                list_for_each_safe(pos, tmpq, &scif_info.listen) {
 222                        tmpep = list_entry(pos, struct scif_endpt, list);
 223                        if (tmpep == ep)
 224                                list_del(pos);
 225                }
 226                /* Remove any dangling accepts */
 227                while (ep->acceptcnt) {
 228                        aep = list_first_entry(&ep->li_accept,
 229                                               struct scif_endpt, liacceptlist);
 230                        list_del(&aep->liacceptlist);
 231                        scif_put_port(aep->port.port);
 232                        list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
 233                                tmpep = list_entry(pos, struct scif_endpt,
 234                                                   miacceptlist);
 235                                if (tmpep == aep) {
 236                                        list_del(pos);
 237                                        break;
 238                                }
 239                        }
 240                        mutex_unlock(&scif_info.eplock);
 241                        mutex_lock(&scif_info.connlock);
 242                        list_for_each_safe(pos, tmpq, &scif_info.connected) {
 243                                tmpep = list_entry(pos,
 244                                                   struct scif_endpt, list);
 245                                if (tmpep == aep) {
 246                                        list_del(pos);
 247                                        break;
 248                                }
 249                        }
 250                        list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 251                                tmpep = list_entry(pos,
 252                                                   struct scif_endpt, list);
 253                                if (tmpep == aep) {
 254                                        list_del(pos);
 255                                        break;
 256                                }
 257                        }
 258                        mutex_unlock(&scif_info.connlock);
 259                        scif_teardown_ep(aep);
 260                        mutex_lock(&scif_info.eplock);
 261                        scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
 262                        ep->acceptcnt--;
 263                }
 264
 265                spin_lock(&ep->lock);
 266                mutex_unlock(&scif_info.eplock);
 267
 268                /* Remove and reject any pending connection requests. */
 269                while (ep->conreqcnt) {
 270                        conreq = list_first_entry(&ep->conlist,
 271                                                  struct scif_conreq, list);
 272                        list_del(&conreq->list);
 273
 274                        msg.uop = SCIF_CNCT_REJ;
 275                        msg.dst.node = conreq->msg.src.node;
 276                        msg.dst.port = conreq->msg.src.port;
 277                        msg.payload[0] = conreq->msg.payload[0];
 278                        msg.payload[1] = conreq->msg.payload[1];
 279                        /*
 280                         * No Error Handling on purpose for scif_nodeqp_send().
 281                         * If the remote node is lost we still want free the
 282                         * connection requests on the self node.
 283                         */
 284                        scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
 285                                         &msg);
 286                        ep->conreqcnt--;
 287                        kfree(conreq);
 288                }
 289
 290                spin_unlock(&ep->lock);
 291                /* If a kSCIF accept is waiting wake it up */
 292                wake_up_interruptible(&ep->conwq);
 293                break;
 294        }
 295        }
 296        scif_put_port(ep->port.port);
 297        scif_anon_inode_fput(ep);
 298        scif_teardown_ep(ep);
 299        scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
 300        return 0;
 301}
 302EXPORT_SYMBOL_GPL(scif_close);
 303
 304/**
 305 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
 306 *                      accept new connections.
 307 * @epd: The end point returned from scif_open()
 308 */
 309int __scif_flush(scif_epd_t epd)
 310{
 311        struct scif_endpt *ep = (struct scif_endpt *)epd;
 312
 313        switch (ep->state) {
 314        case SCIFEP_LISTENING:
 315        {
 316                ep->state = SCIFEP_CLLISTEN;
 317
 318                /* If an accept is waiting wake it up */
 319                wake_up_interruptible(&ep->conwq);
 320                break;
 321        }
 322        default:
 323                break;
 324        }
 325        return 0;
 326}
 327
 328int scif_bind(scif_epd_t epd, u16 pn)
 329{
 330        struct scif_endpt *ep = (struct scif_endpt *)epd;
 331        int ret = 0;
 332        int tmp;
 333
 334        dev_dbg(scif_info.mdev.this_device,
 335                "SCIFAPI bind: ep %p %s requested port number %d\n",
 336                ep, scif_ep_states[ep->state], pn);
 337        if (pn) {
 338                /*
 339                 * Similar to IETF RFC 1700, SCIF ports below
 340                 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
 341                 * processes or by processes executed by privileged users.
 342                 */
 343                if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
 344                        ret = -EACCES;
 345                        goto scif_bind_admin_exit;
 346                }
 347        }
 348
 349        spin_lock(&ep->lock);
 350        if (ep->state == SCIFEP_BOUND) {
 351                ret = -EINVAL;
 352                goto scif_bind_exit;
 353        } else if (ep->state != SCIFEP_UNBOUND) {
 354                ret = -EISCONN;
 355                goto scif_bind_exit;
 356        }
 357
 358        if (pn) {
 359                tmp = scif_rsrv_port(pn);
 360                if (tmp != pn) {
 361                        ret = -EINVAL;
 362                        goto scif_bind_exit;
 363                }
 364        } else {
 365                ret = scif_get_new_port();
 366                if (ret < 0)
 367                        goto scif_bind_exit;
 368                pn = ret;
 369        }
 370
 371        ep->state = SCIFEP_BOUND;
 372        ep->port.node = scif_info.nodeid;
 373        ep->port.port = pn;
 374        ep->conn_async_state = ASYNC_CONN_IDLE;
 375        ret = pn;
 376        dev_dbg(scif_info.mdev.this_device,
 377                "SCIFAPI bind: bound to port number %d\n", pn);
 378scif_bind_exit:
 379        spin_unlock(&ep->lock);
 380scif_bind_admin_exit:
 381        return ret;
 382}
 383EXPORT_SYMBOL_GPL(scif_bind);
 384
 385int scif_listen(scif_epd_t epd, int backlog)
 386{
 387        struct scif_endpt *ep = (struct scif_endpt *)epd;
 388
 389        dev_dbg(scif_info.mdev.this_device,
 390                "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
 391        spin_lock(&ep->lock);
 392        switch (ep->state) {
 393        case SCIFEP_ZOMBIE:
 394        case SCIFEP_CLOSING:
 395        case SCIFEP_CLLISTEN:
 396        case SCIFEP_UNBOUND:
 397        case SCIFEP_DISCONNECTED:
 398                spin_unlock(&ep->lock);
 399                return -EINVAL;
 400        case SCIFEP_LISTENING:
 401        case SCIFEP_CONNECTED:
 402        case SCIFEP_CONNECTING:
 403        case SCIFEP_MAPPING:
 404                spin_unlock(&ep->lock);
 405                return -EISCONN;
 406        case SCIFEP_BOUND:
 407                break;
 408        }
 409
 410        ep->state = SCIFEP_LISTENING;
 411        ep->backlog = backlog;
 412
 413        ep->conreqcnt = 0;
 414        ep->acceptcnt = 0;
 415        INIT_LIST_HEAD(&ep->conlist);
 416        init_waitqueue_head(&ep->conwq);
 417        INIT_LIST_HEAD(&ep->li_accept);
 418        spin_unlock(&ep->lock);
 419
 420        /*
 421         * Listen status is complete so delete the qp information not needed
 422         * on a listen before placing on the list of listening ep's
 423         */
 424        scif_teardown_ep(ep);
 425        ep->qp_info.qp = NULL;
 426
 427        mutex_lock(&scif_info.eplock);
 428        list_add_tail(&ep->list, &scif_info.listen);
 429        mutex_unlock(&scif_info.eplock);
 430        return 0;
 431}
 432EXPORT_SYMBOL_GPL(scif_listen);
 433
 434/*
 435 ************************************************************************
 436 * SCIF connection flow:
 437 *
 438 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
 439 *      connections via a SCIF_CNCT_REQ message
 440 * 2) A SCIF endpoint can initiate a SCIF connection by calling
 441 *      scif_connect(..) which calls scif_setup_qp_connect(..) which
 442 *      allocates the local qp for the endpoint ring buffer and then sends
 443 *      a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
 444 *      a SCIF_CNCT_REJ message
 445 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
 446 *      wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
 447 *      message otherwise
 448 * 4) A thread blocked waiting for incoming connections allocates its local
 449 *      endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
 450 *      and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
 451 *      the node sends a SCIF_CNCT_REJ message
 452 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
 453 *      connecting endpoint is woken up as part of handling
 454 *      scif_cnctgnt_resp(..) following which it maps the remote endpoints'
 455 *      QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
 456 *      success or a SCIF_CNCT_GNTNACK message on failure and completes
 457 *      the scif_connect(..) API
 458 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
 459 *      in step 4 is woken up and completes the scif_accept(..) API
 460 * 7) The SCIF connection is now established between the two SCIF endpoints.
 461 */
 462static int scif_conn_func(struct scif_endpt *ep)
 463{
 464        int err = 0;
 465        struct scifmsg msg;
 466        struct device *spdev;
 467
 468        err = scif_reserve_dma_chan(ep);
 469        if (err) {
 470                dev_err(&ep->remote_dev->sdev->dev,
 471                        "%s %d err %d\n", __func__, __LINE__, err);
 472                ep->state = SCIFEP_BOUND;
 473                goto connect_error_simple;
 474        }
 475        /* Initiate the first part of the endpoint QP setup */
 476        err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
 477                                    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
 478        if (err) {
 479                dev_err(&ep->remote_dev->sdev->dev,
 480                        "%s err %d qp_offset 0x%llx\n",
 481                        __func__, err, ep->qp_info.qp_offset);
 482                ep->state = SCIFEP_BOUND;
 483                goto connect_error_simple;
 484        }
 485
 486        spdev = scif_get_peer_dev(ep->remote_dev);
 487        if (IS_ERR(spdev)) {
 488                err = PTR_ERR(spdev);
 489                goto cleanup_qp;
 490        }
 491        /* Format connect message and send it */
 492        msg.src = ep->port;
 493        msg.dst = ep->conn_port;
 494        msg.uop = SCIF_CNCT_REQ;
 495        msg.payload[0] = (u64)ep;
 496        msg.payload[1] = ep->qp_info.qp_offset;
 497        err = _scif_nodeqp_send(ep->remote_dev, &msg);
 498        if (err)
 499                goto connect_error_dec;
 500        scif_put_peer_dev(spdev);
 501        /*
 502         * Wait for the remote node to respond with SCIF_CNCT_GNT or
 503         * SCIF_CNCT_REJ message.
 504         */
 505        err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
 506                                 SCIF_NODE_ALIVE_TIMEOUT);
 507        if (!err) {
 508                dev_err(&ep->remote_dev->sdev->dev,
 509                        "%s %d timeout\n", __func__, __LINE__);
 510                ep->state = SCIFEP_BOUND;
 511        }
 512        spdev = scif_get_peer_dev(ep->remote_dev);
 513        if (IS_ERR(spdev)) {
 514                err = PTR_ERR(spdev);
 515                goto cleanup_qp;
 516        }
 517        if (ep->state == SCIFEP_MAPPING) {
 518                err = scif_setup_qp_connect_response(ep->remote_dev,
 519                                                     ep->qp_info.qp,
 520                                                     ep->qp_info.gnt_pld);
 521                /*
 522                 * If the resource to map the queue are not available then
 523                 * we need to tell the other side to terminate the accept
 524                 */
 525                if (err) {
 526                        dev_err(&ep->remote_dev->sdev->dev,
 527                                "%s %d err %d\n", __func__, __LINE__, err);
 528                        msg.uop = SCIF_CNCT_GNTNACK;
 529                        msg.payload[0] = ep->remote_ep;
 530                        _scif_nodeqp_send(ep->remote_dev, &msg);
 531                        ep->state = SCIFEP_BOUND;
 532                        goto connect_error_dec;
 533                }
 534
 535                msg.uop = SCIF_CNCT_GNTACK;
 536                msg.payload[0] = ep->remote_ep;
 537                err = _scif_nodeqp_send(ep->remote_dev, &msg);
 538                if (err) {
 539                        ep->state = SCIFEP_BOUND;
 540                        goto connect_error_dec;
 541                }
 542                ep->state = SCIFEP_CONNECTED;
 543                mutex_lock(&scif_info.connlock);
 544                list_add_tail(&ep->list, &scif_info.connected);
 545                mutex_unlock(&scif_info.connlock);
 546                dev_dbg(&ep->remote_dev->sdev->dev,
 547                        "SCIFAPI connect: ep %p connected\n", ep);
 548        } else if (ep->state == SCIFEP_BOUND) {
 549                dev_dbg(&ep->remote_dev->sdev->dev,
 550                        "SCIFAPI connect: ep %p connection refused\n", ep);
 551                err = -ECONNREFUSED;
 552                goto connect_error_dec;
 553        }
 554        scif_put_peer_dev(spdev);
 555        return err;
 556connect_error_dec:
 557        scif_put_peer_dev(spdev);
 558cleanup_qp:
 559        scif_cleanup_ep_qp(ep);
 560connect_error_simple:
 561        return err;
 562}
 563
 564/*
 565 * scif_conn_handler:
 566 *
 567 * Workqueue handler for servicing non-blocking SCIF connect
 568 *
 569 */
 570void scif_conn_handler(struct work_struct *work)
 571{
 572        struct scif_endpt *ep;
 573
 574        do {
 575                ep = NULL;
 576                spin_lock(&scif_info.nb_connect_lock);
 577                if (!list_empty(&scif_info.nb_connect_list)) {
 578                        ep = list_first_entry(&scif_info.nb_connect_list,
 579                                              struct scif_endpt, conn_list);
 580                        list_del(&ep->conn_list);
 581                }
 582                spin_unlock(&scif_info.nb_connect_lock);
 583                if (ep) {
 584                        ep->conn_err = scif_conn_func(ep);
 585                        wake_up_interruptible(&ep->conn_pend_wq);
 586                }
 587        } while (ep);
 588}
 589
 590int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
 591{
 592        struct scif_endpt *ep = (struct scif_endpt *)epd;
 593        int err = 0;
 594        struct scif_dev *remote_dev;
 595        struct device *spdev;
 596
 597        dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
 598                scif_ep_states[ep->state]);
 599
 600        if (!scif_dev || dst->node > scif_info.maxid)
 601                return -ENODEV;
 602
 603        might_sleep();
 604
 605        remote_dev = &scif_dev[dst->node];
 606        spdev = scif_get_peer_dev(remote_dev);
 607        if (IS_ERR(spdev)) {
 608                err = PTR_ERR(spdev);
 609                return err;
 610        }
 611
 612        spin_lock(&ep->lock);
 613        switch (ep->state) {
 614        case SCIFEP_ZOMBIE:
 615        case SCIFEP_CLOSING:
 616                err = -EINVAL;
 617                break;
 618        case SCIFEP_DISCONNECTED:
 619                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 620                        ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 621                else
 622                        err = -EINVAL;
 623                break;
 624        case SCIFEP_LISTENING:
 625        case SCIFEP_CLLISTEN:
 626                err = -EOPNOTSUPP;
 627                break;
 628        case SCIFEP_CONNECTING:
 629        case SCIFEP_MAPPING:
 630                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 631                        err = -EINPROGRESS;
 632                else
 633                        err = -EISCONN;
 634                break;
 635        case SCIFEP_CONNECTED:
 636                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 637                        ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 638                else
 639                        err = -EISCONN;
 640                break;
 641        case SCIFEP_UNBOUND:
 642                err = scif_get_new_port();
 643                if (err < 0)
 644                        break;
 645                ep->port.port = err;
 646                ep->port.node = scif_info.nodeid;
 647                ep->conn_async_state = ASYNC_CONN_IDLE;
 648                fallthrough;
 649        case SCIFEP_BOUND:
 650                /*
 651                 * If a non-blocking connect has been already initiated
 652                 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
 653                 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
 654                 * SCIF_BOUND due an error in the connection process
 655                 * (e.g., connection refused) If conn_async_state is
 656                 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
 657                 * so that the error status can be collected. If the state is
 658                 * already ASYNC_CONN_FLUSH_WORK - then set the error to
 659                 * EINPROGRESS since some other thread is waiting to collect
 660                 * error status.
 661                 */
 662                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 663                        ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 664                } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 665                        err = -EINPROGRESS;
 666                } else {
 667                        ep->conn_port = *dst;
 668                        init_waitqueue_head(&ep->sendwq);
 669                        init_waitqueue_head(&ep->recvwq);
 670                        init_waitqueue_head(&ep->conwq);
 671                        ep->conn_async_state = 0;
 672
 673                        if (unlikely(non_block))
 674                                ep->conn_async_state = ASYNC_CONN_INPROGRESS;
 675                }
 676                break;
 677        }
 678
 679        if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
 680                        goto connect_simple_unlock1;
 681
 682        ep->state = SCIFEP_CONNECTING;
 683        ep->remote_dev = &scif_dev[dst->node];
 684        ep->qp_info.qp->magic = SCIFEP_MAGIC;
 685        if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 686                init_waitqueue_head(&ep->conn_pend_wq);
 687                spin_lock(&scif_info.nb_connect_lock);
 688                list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
 689                spin_unlock(&scif_info.nb_connect_lock);
 690                err = -EINPROGRESS;
 691                schedule_work(&scif_info.conn_work);
 692        }
 693connect_simple_unlock1:
 694        spin_unlock(&ep->lock);
 695        scif_put_peer_dev(spdev);
 696        if (err) {
 697                return err;
 698        } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 699                flush_work(&scif_info.conn_work);
 700                err = ep->conn_err;
 701                spin_lock(&ep->lock);
 702                ep->conn_async_state = ASYNC_CONN_IDLE;
 703                spin_unlock(&ep->lock);
 704        } else {
 705                err = scif_conn_func(ep);
 706        }
 707        return err;
 708}
 709
 710int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
 711{
 712        return __scif_connect(epd, dst, false);
 713}
 714EXPORT_SYMBOL_GPL(scif_connect);
 715
 716/*
 717 * scif_accept() - Accept a connection request from the remote node
 718 *
 719 * The function accepts a connection request from the remote node.  Successful
 720 * complete is indicate by a new end point being created and passed back
 721 * to the caller for future reference.
 722 *
 723 * Upon successful complete a zero will be returned and the peer information
 724 * will be filled in.
 725 *
 726 * If the end point is not in the listening state -EINVAL will be returned.
 727 *
 728 * If during the connection sequence resource allocation fails the -ENOMEM
 729 * will be returned.
 730 *
 731 * If the function is called with the ASYNC flag set and no connection requests
 732 * are pending it will return -EAGAIN.
 733 *
 734 * If the remote side is not sending any connection requests the caller may
 735 * terminate this function with a signal.  If so a -EINTR will be returned.
 736 */
 737int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
 738                scif_epd_t *newepd, int flags)
 739{
 740        struct scif_endpt *lep = (struct scif_endpt *)epd;
 741        struct scif_endpt *cep;
 742        struct scif_conreq *conreq;
 743        struct scifmsg msg;
 744        int err;
 745        struct device *spdev;
 746
 747        dev_dbg(scif_info.mdev.this_device,
 748                "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
 749
 750        if (flags & ~SCIF_ACCEPT_SYNC)
 751                return -EINVAL;
 752
 753        if (!peer || !newepd)
 754                return -EINVAL;
 755
 756        might_sleep();
 757        spin_lock(&lep->lock);
 758        if (lep->state != SCIFEP_LISTENING) {
 759                spin_unlock(&lep->lock);
 760                return -EINVAL;
 761        }
 762
 763        if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
 764                /* No connection request present and we do not want to wait */
 765                spin_unlock(&lep->lock);
 766                return -EAGAIN;
 767        }
 768
 769        lep->files = current->files;
 770retry_connection:
 771        spin_unlock(&lep->lock);
 772        /* Wait for the remote node to send us a SCIF_CNCT_REQ */
 773        err = wait_event_interruptible(lep->conwq,
 774                                       (lep->conreqcnt ||
 775                                       (lep->state != SCIFEP_LISTENING)));
 776        if (err)
 777                return err;
 778
 779        if (lep->state != SCIFEP_LISTENING)
 780                return -EINTR;
 781
 782        spin_lock(&lep->lock);
 783
 784        if (!lep->conreqcnt)
 785                goto retry_connection;
 786
 787        /* Get the first connect request off the list */
 788        conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
 789        list_del(&conreq->list);
 790        lep->conreqcnt--;
 791        spin_unlock(&lep->lock);
 792
 793        /* Fill in the peer information */
 794        peer->node = conreq->msg.src.node;
 795        peer->port = conreq->msg.src.port;
 796
 797        cep = kzalloc(sizeof(*cep), GFP_KERNEL);
 798        if (!cep) {
 799                err = -ENOMEM;
 800                goto scif_accept_error_epalloc;
 801        }
 802        spin_lock_init(&cep->lock);
 803        mutex_init(&cep->sendlock);
 804        mutex_init(&cep->recvlock);
 805        cep->state = SCIFEP_CONNECTING;
 806        cep->remote_dev = &scif_dev[peer->node];
 807        cep->remote_ep = conreq->msg.payload[0];
 808
 809        scif_rma_ep_init(cep);
 810
 811        err = scif_reserve_dma_chan(cep);
 812        if (err) {
 813                dev_err(scif_info.mdev.this_device,
 814                        "%s %d err %d\n", __func__, __LINE__, err);
 815                goto scif_accept_error_qpalloc;
 816        }
 817
 818        cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
 819        if (!cep->qp_info.qp) {
 820                err = -ENOMEM;
 821                goto scif_accept_error_qpalloc;
 822        }
 823
 824        err = scif_anon_inode_getfile(cep);
 825        if (err)
 826                goto scif_accept_error_anon_inode;
 827
 828        cep->qp_info.qp->magic = SCIFEP_MAGIC;
 829        spdev = scif_get_peer_dev(cep->remote_dev);
 830        if (IS_ERR(spdev)) {
 831                err = PTR_ERR(spdev);
 832                goto scif_accept_error_map;
 833        }
 834        err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
 835                                   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
 836                                   cep->remote_dev);
 837        if (err) {
 838                dev_dbg(&cep->remote_dev->sdev->dev,
 839                        "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
 840                        lep, cep, err, cep->qp_info.qp_offset);
 841                scif_put_peer_dev(spdev);
 842                goto scif_accept_error_map;
 843        }
 844
 845        cep->port.node = lep->port.node;
 846        cep->port.port = lep->port.port;
 847        cep->peer.node = peer->node;
 848        cep->peer.port = peer->port;
 849        init_waitqueue_head(&cep->sendwq);
 850        init_waitqueue_head(&cep->recvwq);
 851        init_waitqueue_head(&cep->conwq);
 852
 853        msg.uop = SCIF_CNCT_GNT;
 854        msg.src = cep->port;
 855        msg.payload[0] = cep->remote_ep;
 856        msg.payload[1] = cep->qp_info.qp_offset;
 857        msg.payload[2] = (u64)cep;
 858
 859        err = _scif_nodeqp_send(cep->remote_dev, &msg);
 860        scif_put_peer_dev(spdev);
 861        if (err)
 862                goto scif_accept_error_map;
 863retry:
 864        /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
 865        err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
 866                                 SCIF_NODE_ACCEPT_TIMEOUT);
 867        if (!err && scifdev_alive(cep))
 868                goto retry;
 869        err = !err ? -ENODEV : 0;
 870        if (err)
 871                goto scif_accept_error_map;
 872        kfree(conreq);
 873
 874        spin_lock(&cep->lock);
 875
 876        if (cep->state == SCIFEP_CLOSING) {
 877                /*
 878                 * Remote failed to allocate resources and NAKed the grant.
 879                 * There is at this point nothing referencing the new end point.
 880                 */
 881                spin_unlock(&cep->lock);
 882                scif_teardown_ep(cep);
 883                kfree(cep);
 884
 885                /* If call with sync flag then go back and wait. */
 886                if (flags & SCIF_ACCEPT_SYNC) {
 887                        spin_lock(&lep->lock);
 888                        goto retry_connection;
 889                }
 890                return -EAGAIN;
 891        }
 892
 893        scif_get_port(cep->port.port);
 894        *newepd = (scif_epd_t)cep;
 895        spin_unlock(&cep->lock);
 896        return 0;
 897scif_accept_error_map:
 898        scif_anon_inode_fput(cep);
 899scif_accept_error_anon_inode:
 900        scif_teardown_ep(cep);
 901scif_accept_error_qpalloc:
 902        kfree(cep);
 903scif_accept_error_epalloc:
 904        msg.uop = SCIF_CNCT_REJ;
 905        msg.dst.node = conreq->msg.src.node;
 906        msg.dst.port = conreq->msg.src.port;
 907        msg.payload[0] = conreq->msg.payload[0];
 908        msg.payload[1] = conreq->msg.payload[1];
 909        scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
 910        kfree(conreq);
 911        return err;
 912}
 913EXPORT_SYMBOL_GPL(scif_accept);
 914
 915/*
 916 * scif_msg_param_check:
 917 * @epd: The end point returned from scif_open()
 918 * @len: Length to receive
 919 * @flags: blocking or non blocking
 920 *
 921 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
 922 */
 923static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
 924{
 925        int ret = -EINVAL;
 926
 927        if (len < 0)
 928                goto err_ret;
 929        if (flags && (!(flags & SCIF_RECV_BLOCK)))
 930                goto err_ret;
 931        ret = 0;
 932err_ret:
 933        return ret;
 934}
 935
 936static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
 937{
 938        struct scif_endpt *ep = (struct scif_endpt *)epd;
 939        struct scifmsg notif_msg;
 940        int curr_xfer_len = 0, sent_len = 0, write_count;
 941        int ret = 0;
 942        struct scif_qp *qp = ep->qp_info.qp;
 943
 944        if (flags & SCIF_SEND_BLOCK)
 945                might_sleep();
 946
 947        spin_lock(&ep->lock);
 948        while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
 949                write_count = scif_rb_space(&qp->outbound_q);
 950                if (write_count) {
 951                        /* Best effort to send as much data as possible */
 952                        curr_xfer_len = min(len - sent_len, write_count);
 953                        ret = scif_rb_write(&qp->outbound_q, msg,
 954                                            curr_xfer_len);
 955                        if (ret < 0)
 956                                break;
 957                        /* Success. Update write pointer */
 958                        scif_rb_commit(&qp->outbound_q);
 959                        /*
 960                         * Send a notification to the peer about the
 961                         * produced data message.
 962                         */
 963                        notif_msg.src = ep->port;
 964                        notif_msg.uop = SCIF_CLIENT_SENT;
 965                        notif_msg.payload[0] = ep->remote_ep;
 966                        ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
 967                        if (ret)
 968                                break;
 969                        sent_len += curr_xfer_len;
 970                        msg = msg + curr_xfer_len;
 971                        continue;
 972                }
 973                curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
 974                /* Not enough RB space. return for the Non Blocking case */
 975                if (!(flags & SCIF_SEND_BLOCK))
 976                        break;
 977
 978                spin_unlock(&ep->lock);
 979                /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
 980                ret =
 981                wait_event_interruptible(ep->sendwq,
 982                                         (SCIFEP_CONNECTED != ep->state) ||
 983                                         (scif_rb_space(&qp->outbound_q) >=
 984                                         curr_xfer_len));
 985                spin_lock(&ep->lock);
 986                if (ret)
 987                        break;
 988        }
 989        if (sent_len)
 990                ret = sent_len;
 991        else if (!ret && SCIFEP_CONNECTED != ep->state)
 992                ret = SCIFEP_DISCONNECTED == ep->state ?
 993                        -ECONNRESET : -ENOTCONN;
 994        spin_unlock(&ep->lock);
 995        return ret;
 996}
 997
 998static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
 999{
1000        struct scif_endpt *ep = (struct scif_endpt *)epd;
1001        struct scifmsg notif_msg;
1002        int curr_recv_len = 0, remaining_len = len, read_count;
1003        int ret = 0;
1004        struct scif_qp *qp = ep->qp_info.qp;
1005
1006        if (flags & SCIF_RECV_BLOCK)
1007                might_sleep();
1008        spin_lock(&ep->lock);
1009        while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1010                                 SCIFEP_DISCONNECTED == ep->state)) {
1011                read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1012                if (read_count) {
1013                        /*
1014                         * Best effort to recv as much data as there
1015                         * are bytes to read in the RB particularly
1016                         * important for the Non Blocking case.
1017                         */
1018                        curr_recv_len = min(remaining_len, read_count);
1019                        scif_rb_get_next(&qp->inbound_q, msg, curr_recv_len);
1020                        if (ep->state == SCIFEP_CONNECTED) {
1021                                /*
1022                                 * Update the read pointer only if the endpoint
1023                                 * is still connected else the read pointer
1024                                 * might no longer exist since the peer has
1025                                 * freed resources!
1026                                 */
1027                                scif_rb_update_read_ptr(&qp->inbound_q);
1028                                /*
1029                                 * Send a notification to the peer about the
1030                                 * consumed data message only if the EP is in
1031                                 * SCIFEP_CONNECTED state.
1032                                 */
1033                                notif_msg.src = ep->port;
1034                                notif_msg.uop = SCIF_CLIENT_RCVD;
1035                                notif_msg.payload[0] = ep->remote_ep;
1036                                ret = _scif_nodeqp_send(ep->remote_dev,
1037                                                        &notif_msg);
1038                                if (ret)
1039                                        break;
1040                        }
1041                        remaining_len -= curr_recv_len;
1042                        msg = msg + curr_recv_len;
1043                        continue;
1044                }
1045                /*
1046                 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1047                 * we will keep looping forever.
1048                 */
1049                if (ep->state == SCIFEP_DISCONNECTED)
1050                        break;
1051                /*
1052                 * Return in the Non Blocking case if there is no data
1053                 * to read in this iteration.
1054                 */
1055                if (!(flags & SCIF_RECV_BLOCK))
1056                        break;
1057                curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1058                spin_unlock(&ep->lock);
1059                /*
1060                 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1061                 * or until other side disconnects.
1062                 */
1063                ret =
1064                wait_event_interruptible(ep->recvwq,
1065                                         SCIFEP_CONNECTED != ep->state ||
1066                                         scif_rb_count(&qp->inbound_q,
1067                                                       curr_recv_len)
1068                                         >= curr_recv_len);
1069                spin_lock(&ep->lock);
1070                if (ret)
1071                        break;
1072        }
1073        if (len - remaining_len)
1074                ret = len - remaining_len;
1075        else if (!ret && ep->state != SCIFEP_CONNECTED)
1076                ret = ep->state == SCIFEP_DISCONNECTED ?
1077                        -ECONNRESET : -ENOTCONN;
1078        spin_unlock(&ep->lock);
1079        return ret;
1080}
1081
1082/**
1083 * scif_user_send() - Send data to connection queue
1084 * @epd: The end point returned from scif_open()
1085 * @msg: Address to place data
1086 * @len: Length to receive
1087 * @flags: blocking or non blocking
1088 *
1089 * This function is called from the driver IOCTL entry point
1090 * only and is a wrapper for _scif_send().
1091 */
1092int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1093{
1094        struct scif_endpt *ep = (struct scif_endpt *)epd;
1095        int err = 0;
1096        int sent_len = 0;
1097        char *tmp;
1098        int loop_len;
1099        int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1100
1101        dev_dbg(scif_info.mdev.this_device,
1102                "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1103        if (!len)
1104                return 0;
1105
1106        err = scif_msg_param_check(epd, len, flags);
1107        if (err)
1108                goto send_err;
1109
1110        tmp = kmalloc(chunk_len, GFP_KERNEL);
1111        if (!tmp) {
1112                err = -ENOMEM;
1113                goto send_err;
1114        }
1115        /*
1116         * Grabbing the lock before breaking up the transfer in
1117         * multiple chunks is required to ensure that messages do
1118         * not get fragmented and reordered.
1119         */
1120        mutex_lock(&ep->sendlock);
1121        while (sent_len != len) {
1122                loop_len = len - sent_len;
1123                loop_len = min(chunk_len, loop_len);
1124                if (copy_from_user(tmp, msg, loop_len)) {
1125                        err = -EFAULT;
1126                        goto send_free_err;
1127                }
1128                err = _scif_send(epd, tmp, loop_len, flags);
1129                if (err < 0)
1130                        goto send_free_err;
1131                sent_len += err;
1132                msg += err;
1133                if (err != loop_len)
1134                        goto send_free_err;
1135        }
1136send_free_err:
1137        mutex_unlock(&ep->sendlock);
1138        kfree(tmp);
1139send_err:
1140        return err < 0 ? err : sent_len;
1141}
1142
1143/**
1144 * scif_user_recv() - Receive data from connection queue
1145 * @epd: The end point returned from scif_open()
1146 * @msg: Address to place data
1147 * @len: Length to receive
1148 * @flags: blocking or non blocking
1149 *
1150 * This function is called from the driver IOCTL entry point
1151 * only and is a wrapper for _scif_recv().
1152 */
1153int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1154{
1155        struct scif_endpt *ep = (struct scif_endpt *)epd;
1156        int err = 0;
1157        int recv_len = 0;
1158        char *tmp;
1159        int loop_len;
1160        int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1161
1162        dev_dbg(scif_info.mdev.this_device,
1163                "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1164        if (!len)
1165                return 0;
1166
1167        err = scif_msg_param_check(epd, len, flags);
1168        if (err)
1169                goto recv_err;
1170
1171        tmp = kmalloc(chunk_len, GFP_KERNEL);
1172        if (!tmp) {
1173                err = -ENOMEM;
1174                goto recv_err;
1175        }
1176        /*
1177         * Grabbing the lock before breaking up the transfer in
1178         * multiple chunks is required to ensure that messages do
1179         * not get fragmented and reordered.
1180         */
1181        mutex_lock(&ep->recvlock);
1182        while (recv_len != len) {
1183                loop_len = len - recv_len;
1184                loop_len = min(chunk_len, loop_len);
1185                err = _scif_recv(epd, tmp, loop_len, flags);
1186                if (err < 0)
1187                        goto recv_free_err;
1188                if (copy_to_user(msg, tmp, err)) {
1189                        err = -EFAULT;
1190                        goto recv_free_err;
1191                }
1192                recv_len += err;
1193                msg += err;
1194                if (err != loop_len)
1195                        goto recv_free_err;
1196        }
1197recv_free_err:
1198        mutex_unlock(&ep->recvlock);
1199        kfree(tmp);
1200recv_err:
1201        return err < 0 ? err : recv_len;
1202}
1203
1204/**
1205 * scif_send() - Send data to connection queue
1206 * @epd: The end point returned from scif_open()
1207 * @msg: Address to place data
1208 * @len: Length to receive
1209 * @flags: blocking or non blocking
1210 *
1211 * This function is called from the kernel mode only and is
1212 * a wrapper for _scif_send().
1213 */
1214int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1215{
1216        struct scif_endpt *ep = (struct scif_endpt *)epd;
1217        int ret;
1218
1219        dev_dbg(scif_info.mdev.this_device,
1220                "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1221        if (!len)
1222                return 0;
1223
1224        ret = scif_msg_param_check(epd, len, flags);
1225        if (ret)
1226                return ret;
1227        if (!ep->remote_dev)
1228                return -ENOTCONN;
1229        /*
1230         * Grab the mutex lock in the blocking case only
1231         * to ensure messages do not get fragmented/reordered.
1232         * The non blocking mode is protected using spin locks
1233         * in _scif_send().
1234         */
1235        if (flags & SCIF_SEND_BLOCK)
1236                mutex_lock(&ep->sendlock);
1237
1238        ret = _scif_send(epd, msg, len, flags);
1239
1240        if (flags & SCIF_SEND_BLOCK)
1241                mutex_unlock(&ep->sendlock);
1242        return ret;
1243}
1244EXPORT_SYMBOL_GPL(scif_send);
1245
1246/**
1247 * scif_recv() - Receive data from connection queue
1248 * @epd: The end point returned from scif_open()
1249 * @msg: Address to place data
1250 * @len: Length to receive
1251 * @flags: blocking or non blocking
1252 *
1253 * This function is called from the kernel mode only and is
1254 * a wrapper for _scif_recv().
1255 */
1256int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1257{
1258        struct scif_endpt *ep = (struct scif_endpt *)epd;
1259        int ret;
1260
1261        dev_dbg(scif_info.mdev.this_device,
1262                "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1263        if (!len)
1264                return 0;
1265
1266        ret = scif_msg_param_check(epd, len, flags);
1267        if (ret)
1268                return ret;
1269        /*
1270         * Grab the mutex lock in the blocking case only
1271         * to ensure messages do not get fragmented/reordered.
1272         * The non blocking mode is protected using spin locks
1273         * in _scif_send().
1274         */
1275        if (flags & SCIF_RECV_BLOCK)
1276                mutex_lock(&ep->recvlock);
1277
1278        ret = _scif_recv(epd, msg, len, flags);
1279
1280        if (flags & SCIF_RECV_BLOCK)
1281                mutex_unlock(&ep->recvlock);
1282
1283        return ret;
1284}
1285EXPORT_SYMBOL_GPL(scif_recv);
1286
1287static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1288                                   poll_table *p, struct scif_endpt *ep)
1289{
1290        /*
1291         * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1292         * and regrab it afterwards. Because the endpoint state might have
1293         * changed while the lock was given up, the state must be checked
1294         * again after re-acquiring the lock. The code in __scif_pollfd(..)
1295         * does this.
1296         */
1297        spin_unlock(&ep->lock);
1298        poll_wait(f, wq, p);
1299        spin_lock(&ep->lock);
1300}
1301
1302__poll_t
1303__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1304{
1305        __poll_t mask = 0;
1306
1307        dev_dbg(scif_info.mdev.this_device,
1308                "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1309
1310        spin_lock(&ep->lock);
1311
1312        /* Endpoint is waiting for a non-blocking connect to complete */
1313        if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1314                _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1315                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1316                        if (ep->state == SCIFEP_CONNECTED ||
1317                            ep->state == SCIFEP_DISCONNECTED ||
1318                            ep->conn_err)
1319                                mask |= EPOLLOUT;
1320                        goto exit;
1321                }
1322        }
1323
1324        /* Endpoint is listening for incoming connection requests */
1325        if (ep->state == SCIFEP_LISTENING) {
1326                _scif_poll_wait(f, &ep->conwq, wait, ep);
1327                if (ep->state == SCIFEP_LISTENING) {
1328                        if (ep->conreqcnt)
1329                                mask |= EPOLLIN;
1330                        goto exit;
1331                }
1332        }
1333
1334        /* Endpoint is connected or disconnected */
1335        if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1336                if (poll_requested_events(wait) & EPOLLIN)
1337                        _scif_poll_wait(f, &ep->recvwq, wait, ep);
1338                if (poll_requested_events(wait) & EPOLLOUT)
1339                        _scif_poll_wait(f, &ep->sendwq, wait, ep);
1340                if (ep->state == SCIFEP_CONNECTED ||
1341                    ep->state == SCIFEP_DISCONNECTED) {
1342                        /* Data can be read without blocking */
1343                        if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1344                                mask |= EPOLLIN;
1345                        /* Data can be written without blocking */
1346                        if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1347                                mask |= EPOLLOUT;
1348                        /* Return EPOLLHUP if endpoint is disconnected */
1349                        if (ep->state == SCIFEP_DISCONNECTED)
1350                                mask |= EPOLLHUP;
1351                        goto exit;
1352                }
1353        }
1354
1355        /* Return EPOLLERR if the endpoint is in none of the above states */
1356        mask |= EPOLLERR;
1357exit:
1358        spin_unlock(&ep->lock);
1359        return mask;
1360}
1361
1362/**
1363 * scif_poll() - Kernel mode SCIF poll
1364 * @ufds: Array of scif_pollepd structures containing the end points
1365 *        and events to poll on
1366 * @nfds: Size of the ufds array
1367 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1368 *
1369 * The code flow in this function is based on do_poll(..) in select.c
1370 *
1371 * Returns the number of endpoints which have pending events or 0 in
1372 * the event of a timeout. If a signal is used for wake up, -EINTR is
1373 * returned.
1374 */
1375int
1376scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1377{
1378        struct poll_wqueues table;
1379        poll_table *pt;
1380        int i, count = 0, timed_out = timeout_msecs == 0;
1381        __poll_t mask;
1382        u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1383                : msecs_to_jiffies(timeout_msecs);
1384
1385        poll_initwait(&table);
1386        pt = &table.pt;
1387        while (1) {
1388                for (i = 0; i < nfds; i++) {
1389                        pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
1390                        mask = __scif_pollfd(ufds[i].epd->anon,
1391                                             pt, ufds[i].epd);
1392                        mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
1393                        if (mask) {
1394                                count++;
1395                                pt->_qproc = NULL;
1396                        }
1397                        ufds[i].revents = mask;
1398                }
1399                pt->_qproc = NULL;
1400                if (!count) {
1401                        count = table.error;
1402                        if (signal_pending(current))
1403                                count = -EINTR;
1404                }
1405                if (count || timed_out)
1406                        break;
1407
1408                if (!schedule_timeout_interruptible(timeout))
1409                        timed_out = 1;
1410        }
1411        poll_freewait(&table);
1412        return count;
1413}
1414EXPORT_SYMBOL_GPL(scif_poll);
1415
1416int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1417{
1418        int online = 0;
1419        int offset = 0;
1420        int node;
1421
1422        if (!scif_is_mgmt_node())
1423                scif_get_node_info();
1424
1425        *self = scif_info.nodeid;
1426        mutex_lock(&scif_info.conflock);
1427        len = min_t(int, len, scif_info.total);
1428        for (node = 0; node <= scif_info.maxid; node++) {
1429                if (_scifdev_alive(&scif_dev[node])) {
1430                        online++;
1431                        if (offset < len)
1432                                nodes[offset++] = node;
1433                }
1434        }
1435        dev_dbg(scif_info.mdev.this_device,
1436                "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1437                scif_info.total, online, offset);
1438        mutex_unlock(&scif_info.conflock);
1439
1440        return online;
1441}
1442EXPORT_SYMBOL_GPL(scif_get_node_ids);
1443
1444static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1445{
1446        struct scif_client *client =
1447                container_of(si, struct scif_client, si);
1448        struct scif_peer_dev *spdev =
1449                container_of(dev, struct scif_peer_dev, dev);
1450
1451        if (client->probe)
1452                client->probe(spdev);
1453        return 0;
1454}
1455
1456static void scif_remove_client_dev(struct device *dev,
1457                                   struct subsys_interface *si)
1458{
1459        struct scif_client *client =
1460                container_of(si, struct scif_client, si);
1461        struct scif_peer_dev *spdev =
1462                container_of(dev, struct scif_peer_dev, dev);
1463
1464        if (client->remove)
1465                client->remove(spdev);
1466}
1467
1468void scif_client_unregister(struct scif_client *client)
1469{
1470        subsys_interface_unregister(&client->si);
1471}
1472EXPORT_SYMBOL_GPL(scif_client_unregister);
1473
1474int scif_client_register(struct scif_client *client)
1475{
1476        struct subsys_interface *si = &client->si;
1477
1478        si->name = client->name;
1479        si->subsys = &scif_peer_bus;
1480        si->add_dev = scif_add_client_dev;
1481        si->remove_dev = scif_remove_client_dev;
1482
1483        return subsys_interface_register(&client->si);
1484}
1485EXPORT_SYMBOL_GPL(scif_client_register);
1486