linux/drivers/misc/mic/scif/scif_api.c
<<
>>
Prefs
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2014 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * Intel SCIF driver.
  16 *
  17 */
  18#include <linux/scif.h>
  19#include "scif_main.h"
  20#include "scif_map.h"
  21
  22static const char * const scif_ep_states[] = {
  23        "Unbound",
  24        "Bound",
  25        "Listening",
  26        "Connected",
  27        "Connecting",
  28        "Mapping",
  29        "Closing",
  30        "Close Listening",
  31        "Disconnected",
  32        "Zombie"};
  33
  34enum conn_async_state {
  35        ASYNC_CONN_IDLE = 1,    /* ep setup for async connect */
  36        ASYNC_CONN_INPROGRESS,  /* async connect in progress */
  37        ASYNC_CONN_FLUSH_WORK   /* async work flush in progress  */
  38};
  39
  40/*
  41 * File operations for anonymous inode file associated with a SCIF endpoint,
  42 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
  43 * poll API in the kernel and these take in a struct file *. Since a struct
  44 * file is not available to kernel mode SCIF, it uses an anonymous file for
  45 * this purpose.
  46 */
  47const struct file_operations scif_anon_fops = {
  48        .owner = THIS_MODULE,
  49};
  50
  51scif_epd_t scif_open(void)
  52{
  53        struct scif_endpt *ep;
  54        int err;
  55
  56        might_sleep();
  57        ep = kzalloc(sizeof(*ep), GFP_KERNEL);
  58        if (!ep)
  59                goto err_ep_alloc;
  60
  61        ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
  62        if (!ep->qp_info.qp)
  63                goto err_qp_alloc;
  64
  65        err = scif_anon_inode_getfile(ep);
  66        if (err)
  67                goto err_anon_inode;
  68
  69        spin_lock_init(&ep->lock);
  70        mutex_init(&ep->sendlock);
  71        mutex_init(&ep->recvlock);
  72
  73        scif_rma_ep_init(ep);
  74        ep->state = SCIFEP_UNBOUND;
  75        dev_dbg(scif_info.mdev.this_device,
  76                "SCIFAPI open: ep %p success\n", ep);
  77        return ep;
  78
  79err_anon_inode:
  80        kfree(ep->qp_info.qp);
  81err_qp_alloc:
  82        kfree(ep);
  83err_ep_alloc:
  84        return NULL;
  85}
  86EXPORT_SYMBOL_GPL(scif_open);
  87
  88/*
  89 * scif_disconnect_ep - Disconnects the endpoint if found
  90 * @epd: The end point returned from scif_open()
  91 */
  92static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
  93{
  94        struct scifmsg msg;
  95        struct scif_endpt *fep = NULL;
  96        struct scif_endpt *tmpep;
  97        struct list_head *pos, *tmpq;
  98        int err;
  99
 100        /*
 101         * Wake up any threads blocked in send()/recv() before closing
 102         * out the connection. Grabbing and releasing the send/recv lock
 103         * will ensure that any blocked senders/receivers have exited for
 104         * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
 105         * close. Ring 3 endpoints are not affected since close will not
 106         * be called while there are IOCTLs executing.
 107         */
 108        wake_up_interruptible(&ep->sendwq);
 109        wake_up_interruptible(&ep->recvwq);
 110        mutex_lock(&ep->sendlock);
 111        mutex_unlock(&ep->sendlock);
 112        mutex_lock(&ep->recvlock);
 113        mutex_unlock(&ep->recvlock);
 114
 115        /* Remove from the connected list */
 116        mutex_lock(&scif_info.connlock);
 117        list_for_each_safe(pos, tmpq, &scif_info.connected) {
 118                tmpep = list_entry(pos, struct scif_endpt, list);
 119                if (tmpep == ep) {
 120                        list_del(pos);
 121                        fep = tmpep;
 122                        spin_lock(&ep->lock);
 123                        break;
 124                }
 125        }
 126
 127        if (!fep) {
 128                /*
 129                 * The other side has completed the disconnect before
 130                 * the end point can be removed from the list. Therefore
 131                 * the ep lock is not locked, traverse the disconnected
 132                 * list to find the endpoint and release the conn lock.
 133                 */
 134                list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 135                        tmpep = list_entry(pos, struct scif_endpt, list);
 136                        if (tmpep == ep) {
 137                                list_del(pos);
 138                                break;
 139                        }
 140                }
 141                mutex_unlock(&scif_info.connlock);
 142                return NULL;
 143        }
 144
 145        init_completion(&ep->discon);
 146        msg.uop = SCIF_DISCNCT;
 147        msg.src = ep->port;
 148        msg.dst = ep->peer;
 149        msg.payload[0] = (u64)ep;
 150        msg.payload[1] = ep->remote_ep;
 151
 152        err = scif_nodeqp_send(ep->remote_dev, &msg);
 153        spin_unlock(&ep->lock);
 154        mutex_unlock(&scif_info.connlock);
 155
 156        if (!err)
 157                /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
 158                wait_for_completion_timeout(&ep->discon,
 159                                            SCIF_NODE_ALIVE_TIMEOUT);
 160        return ep;
 161}
 162
 163int scif_close(scif_epd_t epd)
 164{
 165        struct scif_endpt *ep = (struct scif_endpt *)epd;
 166        struct scif_endpt *tmpep;
 167        struct list_head *pos, *tmpq;
 168        enum scif_epd_state oldstate;
 169        bool flush_conn;
 170
 171        dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
 172                ep, scif_ep_states[ep->state]);
 173        might_sleep();
 174        spin_lock(&ep->lock);
 175        flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
 176        spin_unlock(&ep->lock);
 177
 178        if (flush_conn)
 179                flush_work(&scif_info.conn_work);
 180
 181        spin_lock(&ep->lock);
 182        oldstate = ep->state;
 183
 184        ep->state = SCIFEP_CLOSING;
 185
 186        switch (oldstate) {
 187        case SCIFEP_ZOMBIE:
 188                dev_err(scif_info.mdev.this_device,
 189                        "SCIFAPI close: zombie state unexpected\n");
 190        case SCIFEP_DISCONNECTED:
 191                spin_unlock(&ep->lock);
 192                scif_unregister_all_windows(epd);
 193                /* Remove from the disconnected list */
 194                mutex_lock(&scif_info.connlock);
 195                list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 196                        tmpep = list_entry(pos, struct scif_endpt, list);
 197                        if (tmpep == ep) {
 198                                list_del(pos);
 199                                break;
 200                        }
 201                }
 202                mutex_unlock(&scif_info.connlock);
 203                break;
 204        case SCIFEP_UNBOUND:
 205        case SCIFEP_BOUND:
 206        case SCIFEP_CONNECTING:
 207                spin_unlock(&ep->lock);
 208                break;
 209        case SCIFEP_MAPPING:
 210        case SCIFEP_CONNECTED:
 211        case SCIFEP_CLOSING:
 212        {
 213                spin_unlock(&ep->lock);
 214                scif_unregister_all_windows(epd);
 215                scif_disconnect_ep(ep);
 216                break;
 217        }
 218        case SCIFEP_LISTENING:
 219        case SCIFEP_CLLISTEN:
 220        {
 221                struct scif_conreq *conreq;
 222                struct scifmsg msg;
 223                struct scif_endpt *aep;
 224
 225                spin_unlock(&ep->lock);
 226                mutex_lock(&scif_info.eplock);
 227
 228                /* remove from listen list */
 229                list_for_each_safe(pos, tmpq, &scif_info.listen) {
 230                        tmpep = list_entry(pos, struct scif_endpt, list);
 231                        if (tmpep == ep)
 232                                list_del(pos);
 233                }
 234                /* Remove any dangling accepts */
 235                while (ep->acceptcnt) {
 236                        aep = list_first_entry(&ep->li_accept,
 237                                               struct scif_endpt, liacceptlist);
 238                        list_del(&aep->liacceptlist);
 239                        scif_put_port(aep->port.port);
 240                        list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
 241                                tmpep = list_entry(pos, struct scif_endpt,
 242                                                   miacceptlist);
 243                                if (tmpep == aep) {
 244                                        list_del(pos);
 245                                        break;
 246                                }
 247                        }
 248                        mutex_unlock(&scif_info.eplock);
 249                        mutex_lock(&scif_info.connlock);
 250                        list_for_each_safe(pos, tmpq, &scif_info.connected) {
 251                                tmpep = list_entry(pos,
 252                                                   struct scif_endpt, list);
 253                                if (tmpep == aep) {
 254                                        list_del(pos);
 255                                        break;
 256                                }
 257                        }
 258                        list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
 259                                tmpep = list_entry(pos,
 260                                                   struct scif_endpt, list);
 261                                if (tmpep == aep) {
 262                                        list_del(pos);
 263                                        break;
 264                                }
 265                        }
 266                        mutex_unlock(&scif_info.connlock);
 267                        scif_teardown_ep(aep);
 268                        mutex_lock(&scif_info.eplock);
 269                        scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
 270                        ep->acceptcnt--;
 271                }
 272
 273                spin_lock(&ep->lock);
 274                mutex_unlock(&scif_info.eplock);
 275
 276                /* Remove and reject any pending connection requests. */
 277                while (ep->conreqcnt) {
 278                        conreq = list_first_entry(&ep->conlist,
 279                                                  struct scif_conreq, list);
 280                        list_del(&conreq->list);
 281
 282                        msg.uop = SCIF_CNCT_REJ;
 283                        msg.dst.node = conreq->msg.src.node;
 284                        msg.dst.port = conreq->msg.src.port;
 285                        msg.payload[0] = conreq->msg.payload[0];
 286                        msg.payload[1] = conreq->msg.payload[1];
 287                        /*
 288                         * No Error Handling on purpose for scif_nodeqp_send().
 289                         * If the remote node is lost we still want free the
 290                         * connection requests on the self node.
 291                         */
 292                        scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
 293                                         &msg);
 294                        ep->conreqcnt--;
 295                        kfree(conreq);
 296                }
 297
 298                spin_unlock(&ep->lock);
 299                /* If a kSCIF accept is waiting wake it up */
 300                wake_up_interruptible(&ep->conwq);
 301                break;
 302        }
 303        }
 304        scif_put_port(ep->port.port);
 305        scif_anon_inode_fput(ep);
 306        scif_teardown_ep(ep);
 307        scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
 308        return 0;
 309}
 310EXPORT_SYMBOL_GPL(scif_close);
 311
 312/**
 313 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
 314 *                      accept new connections.
 315 * @epd: The end point returned from scif_open()
 316 */
 317int __scif_flush(scif_epd_t epd)
 318{
 319        struct scif_endpt *ep = (struct scif_endpt *)epd;
 320
 321        switch (ep->state) {
 322        case SCIFEP_LISTENING:
 323        {
 324                ep->state = SCIFEP_CLLISTEN;
 325
 326                /* If an accept is waiting wake it up */
 327                wake_up_interruptible(&ep->conwq);
 328                break;
 329        }
 330        default:
 331                break;
 332        }
 333        return 0;
 334}
 335
 336int scif_bind(scif_epd_t epd, u16 pn)
 337{
 338        struct scif_endpt *ep = (struct scif_endpt *)epd;
 339        int ret = 0;
 340        int tmp;
 341
 342        dev_dbg(scif_info.mdev.this_device,
 343                "SCIFAPI bind: ep %p %s requested port number %d\n",
 344                ep, scif_ep_states[ep->state], pn);
 345        if (pn) {
 346                /*
 347                 * Similar to IETF RFC 1700, SCIF ports below
 348                 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
 349                 * processes or by processes executed by privileged users.
 350                 */
 351                if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
 352                        ret = -EACCES;
 353                        goto scif_bind_admin_exit;
 354                }
 355        }
 356
 357        spin_lock(&ep->lock);
 358        if (ep->state == SCIFEP_BOUND) {
 359                ret = -EINVAL;
 360                goto scif_bind_exit;
 361        } else if (ep->state != SCIFEP_UNBOUND) {
 362                ret = -EISCONN;
 363                goto scif_bind_exit;
 364        }
 365
 366        if (pn) {
 367                tmp = scif_rsrv_port(pn);
 368                if (tmp != pn) {
 369                        ret = -EINVAL;
 370                        goto scif_bind_exit;
 371                }
 372        } else {
 373                pn = scif_get_new_port();
 374                if (!pn) {
 375                        ret = -ENOSPC;
 376                        goto scif_bind_exit;
 377                }
 378        }
 379
 380        ep->state = SCIFEP_BOUND;
 381        ep->port.node = scif_info.nodeid;
 382        ep->port.port = pn;
 383        ep->conn_async_state = ASYNC_CONN_IDLE;
 384        ret = pn;
 385        dev_dbg(scif_info.mdev.this_device,
 386                "SCIFAPI bind: bound to port number %d\n", pn);
 387scif_bind_exit:
 388        spin_unlock(&ep->lock);
 389scif_bind_admin_exit:
 390        return ret;
 391}
 392EXPORT_SYMBOL_GPL(scif_bind);
 393
 394int scif_listen(scif_epd_t epd, int backlog)
 395{
 396        struct scif_endpt *ep = (struct scif_endpt *)epd;
 397
 398        dev_dbg(scif_info.mdev.this_device,
 399                "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
 400        spin_lock(&ep->lock);
 401        switch (ep->state) {
 402        case SCIFEP_ZOMBIE:
 403        case SCIFEP_CLOSING:
 404        case SCIFEP_CLLISTEN:
 405        case SCIFEP_UNBOUND:
 406        case SCIFEP_DISCONNECTED:
 407                spin_unlock(&ep->lock);
 408                return -EINVAL;
 409        case SCIFEP_LISTENING:
 410        case SCIFEP_CONNECTED:
 411        case SCIFEP_CONNECTING:
 412        case SCIFEP_MAPPING:
 413                spin_unlock(&ep->lock);
 414                return -EISCONN;
 415        case SCIFEP_BOUND:
 416                break;
 417        }
 418
 419        ep->state = SCIFEP_LISTENING;
 420        ep->backlog = backlog;
 421
 422        ep->conreqcnt = 0;
 423        ep->acceptcnt = 0;
 424        INIT_LIST_HEAD(&ep->conlist);
 425        init_waitqueue_head(&ep->conwq);
 426        INIT_LIST_HEAD(&ep->li_accept);
 427        spin_unlock(&ep->lock);
 428
 429        /*
 430         * Listen status is complete so delete the qp information not needed
 431         * on a listen before placing on the list of listening ep's
 432         */
 433        scif_teardown_ep(ep);
 434        ep->qp_info.qp = NULL;
 435
 436        mutex_lock(&scif_info.eplock);
 437        list_add_tail(&ep->list, &scif_info.listen);
 438        mutex_unlock(&scif_info.eplock);
 439        return 0;
 440}
 441EXPORT_SYMBOL_GPL(scif_listen);
 442
 443/*
 444 ************************************************************************
 445 * SCIF connection flow:
 446 *
 447 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
 448 *      connections via a SCIF_CNCT_REQ message
 449 * 2) A SCIF endpoint can initiate a SCIF connection by calling
 450 *      scif_connect(..) which calls scif_setup_qp_connect(..) which
 451 *      allocates the local qp for the endpoint ring buffer and then sends
 452 *      a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
 453 *      a SCIF_CNCT_REJ message
 454 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
 455 *      wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
 456 *      message otherwise
 457 * 4) A thread blocked waiting for incoming connections allocates its local
 458 *      endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
 459 *      and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
 460 *      the node sends a SCIF_CNCT_REJ message
 461 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
 462 *      connecting endpoint is woken up as part of handling
 463 *      scif_cnctgnt_resp(..) following which it maps the remote endpoints'
 464 *      QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
 465 *      success or a SCIF_CNCT_GNTNACK message on failure and completes
 466 *      the scif_connect(..) API
 467 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
 468 *      in step 4 is woken up and completes the scif_accept(..) API
 469 * 7) The SCIF connection is now established between the two SCIF endpoints.
 470 */
 471static int scif_conn_func(struct scif_endpt *ep)
 472{
 473        int err = 0;
 474        struct scifmsg msg;
 475        struct device *spdev;
 476
 477        err = scif_reserve_dma_chan(ep);
 478        if (err) {
 479                dev_err(&ep->remote_dev->sdev->dev,
 480                        "%s %d err %d\n", __func__, __LINE__, err);
 481                ep->state = SCIFEP_BOUND;
 482                goto connect_error_simple;
 483        }
 484        /* Initiate the first part of the endpoint QP setup */
 485        err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
 486                                    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
 487        if (err) {
 488                dev_err(&ep->remote_dev->sdev->dev,
 489                        "%s err %d qp_offset 0x%llx\n",
 490                        __func__, err, ep->qp_info.qp_offset);
 491                ep->state = SCIFEP_BOUND;
 492                goto connect_error_simple;
 493        }
 494
 495        spdev = scif_get_peer_dev(ep->remote_dev);
 496        if (IS_ERR(spdev)) {
 497                err = PTR_ERR(spdev);
 498                goto cleanup_qp;
 499        }
 500        /* Format connect message and send it */
 501        msg.src = ep->port;
 502        msg.dst = ep->conn_port;
 503        msg.uop = SCIF_CNCT_REQ;
 504        msg.payload[0] = (u64)ep;
 505        msg.payload[1] = ep->qp_info.qp_offset;
 506        err = _scif_nodeqp_send(ep->remote_dev, &msg);
 507        if (err)
 508                goto connect_error_dec;
 509        scif_put_peer_dev(spdev);
 510        /*
 511         * Wait for the remote node to respond with SCIF_CNCT_GNT or
 512         * SCIF_CNCT_REJ message.
 513         */
 514        err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
 515                                 SCIF_NODE_ALIVE_TIMEOUT);
 516        if (!err) {
 517                dev_err(&ep->remote_dev->sdev->dev,
 518                        "%s %d timeout\n", __func__, __LINE__);
 519                ep->state = SCIFEP_BOUND;
 520        }
 521        spdev = scif_get_peer_dev(ep->remote_dev);
 522        if (IS_ERR(spdev)) {
 523                err = PTR_ERR(spdev);
 524                goto cleanup_qp;
 525        }
 526        if (ep->state == SCIFEP_MAPPING) {
 527                err = scif_setup_qp_connect_response(ep->remote_dev,
 528                                                     ep->qp_info.qp,
 529                                                     ep->qp_info.gnt_pld);
 530                /*
 531                 * If the resource to map the queue are not available then
 532                 * we need to tell the other side to terminate the accept
 533                 */
 534                if (err) {
 535                        dev_err(&ep->remote_dev->sdev->dev,
 536                                "%s %d err %d\n", __func__, __LINE__, err);
 537                        msg.uop = SCIF_CNCT_GNTNACK;
 538                        msg.payload[0] = ep->remote_ep;
 539                        _scif_nodeqp_send(ep->remote_dev, &msg);
 540                        ep->state = SCIFEP_BOUND;
 541                        goto connect_error_dec;
 542                }
 543
 544                msg.uop = SCIF_CNCT_GNTACK;
 545                msg.payload[0] = ep->remote_ep;
 546                err = _scif_nodeqp_send(ep->remote_dev, &msg);
 547                if (err) {
 548                        ep->state = SCIFEP_BOUND;
 549                        goto connect_error_dec;
 550                }
 551                ep->state = SCIFEP_CONNECTED;
 552                mutex_lock(&scif_info.connlock);
 553                list_add_tail(&ep->list, &scif_info.connected);
 554                mutex_unlock(&scif_info.connlock);
 555                dev_dbg(&ep->remote_dev->sdev->dev,
 556                        "SCIFAPI connect: ep %p connected\n", ep);
 557        } else if (ep->state == SCIFEP_BOUND) {
 558                dev_dbg(&ep->remote_dev->sdev->dev,
 559                        "SCIFAPI connect: ep %p connection refused\n", ep);
 560                err = -ECONNREFUSED;
 561                goto connect_error_dec;
 562        }
 563        scif_put_peer_dev(spdev);
 564        return err;
 565connect_error_dec:
 566        scif_put_peer_dev(spdev);
 567cleanup_qp:
 568        scif_cleanup_ep_qp(ep);
 569connect_error_simple:
 570        return err;
 571}
 572
 573/*
 574 * scif_conn_handler:
 575 *
 576 * Workqueue handler for servicing non-blocking SCIF connect
 577 *
 578 */
 579void scif_conn_handler(struct work_struct *work)
 580{
 581        struct scif_endpt *ep;
 582
 583        do {
 584                ep = NULL;
 585                spin_lock(&scif_info.nb_connect_lock);
 586                if (!list_empty(&scif_info.nb_connect_list)) {
 587                        ep = list_first_entry(&scif_info.nb_connect_list,
 588                                              struct scif_endpt, conn_list);
 589                        list_del(&ep->conn_list);
 590                }
 591                spin_unlock(&scif_info.nb_connect_lock);
 592                if (ep) {
 593                        ep->conn_err = scif_conn_func(ep);
 594                        wake_up_interruptible(&ep->conn_pend_wq);
 595                }
 596        } while (ep);
 597}
 598
 599int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
 600{
 601        struct scif_endpt *ep = (struct scif_endpt *)epd;
 602        int err = 0;
 603        struct scif_dev *remote_dev;
 604        struct device *spdev;
 605
 606        dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
 607                scif_ep_states[ep->state]);
 608
 609        if (!scif_dev || dst->node > scif_info.maxid)
 610                return -ENODEV;
 611
 612        might_sleep();
 613
 614        remote_dev = &scif_dev[dst->node];
 615        spdev = scif_get_peer_dev(remote_dev);
 616        if (IS_ERR(spdev)) {
 617                err = PTR_ERR(spdev);
 618                return err;
 619        }
 620
 621        spin_lock(&ep->lock);
 622        switch (ep->state) {
 623        case SCIFEP_ZOMBIE:
 624        case SCIFEP_CLOSING:
 625                err = -EINVAL;
 626                break;
 627        case SCIFEP_DISCONNECTED:
 628                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 629                        ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 630                else
 631                        err = -EINVAL;
 632                break;
 633        case SCIFEP_LISTENING:
 634        case SCIFEP_CLLISTEN:
 635                err = -EOPNOTSUPP;
 636                break;
 637        case SCIFEP_CONNECTING:
 638        case SCIFEP_MAPPING:
 639                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 640                        err = -EINPROGRESS;
 641                else
 642                        err = -EISCONN;
 643                break;
 644        case SCIFEP_CONNECTED:
 645                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
 646                        ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 647                else
 648                        err = -EISCONN;
 649                break;
 650        case SCIFEP_UNBOUND:
 651                ep->port.port = scif_get_new_port();
 652                if (!ep->port.port) {
 653                        err = -ENOSPC;
 654                } else {
 655                        ep->port.node = scif_info.nodeid;
 656                        ep->conn_async_state = ASYNC_CONN_IDLE;
 657                }
 658                /* Fall through */
 659        case SCIFEP_BOUND:
 660                /*
 661                 * If a non-blocking connect has been already initiated
 662                 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
 663                 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
 664                 * SCIF_BOUND due an error in the connection process
 665                 * (e.g., connection refused) If conn_async_state is
 666                 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
 667                 * so that the error status can be collected. If the state is
 668                 * already ASYNC_CONN_FLUSH_WORK - then set the error to
 669                 * EINPROGRESS since some other thread is waiting to collect
 670                 * error status.
 671                 */
 672                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 673                        ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
 674                } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 675                        err = -EINPROGRESS;
 676                } else {
 677                        ep->conn_port = *dst;
 678                        init_waitqueue_head(&ep->sendwq);
 679                        init_waitqueue_head(&ep->recvwq);
 680                        init_waitqueue_head(&ep->conwq);
 681                        ep->conn_async_state = 0;
 682
 683                        if (unlikely(non_block))
 684                                ep->conn_async_state = ASYNC_CONN_INPROGRESS;
 685                }
 686                break;
 687        }
 688
 689        if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
 690                        goto connect_simple_unlock1;
 691
 692        ep->state = SCIFEP_CONNECTING;
 693        ep->remote_dev = &scif_dev[dst->node];
 694        ep->qp_info.qp->magic = SCIFEP_MAGIC;
 695        if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
 696                init_waitqueue_head(&ep->conn_pend_wq);
 697                spin_lock(&scif_info.nb_connect_lock);
 698                list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
 699                spin_unlock(&scif_info.nb_connect_lock);
 700                err = -EINPROGRESS;
 701                schedule_work(&scif_info.conn_work);
 702        }
 703connect_simple_unlock1:
 704        spin_unlock(&ep->lock);
 705        scif_put_peer_dev(spdev);
 706        if (err) {
 707                return err;
 708        } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
 709                flush_work(&scif_info.conn_work);
 710                err = ep->conn_err;
 711                spin_lock(&ep->lock);
 712                ep->conn_async_state = ASYNC_CONN_IDLE;
 713                spin_unlock(&ep->lock);
 714        } else {
 715                err = scif_conn_func(ep);
 716        }
 717        return err;
 718}
 719
 720int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
 721{
 722        return __scif_connect(epd, dst, false);
 723}
 724EXPORT_SYMBOL_GPL(scif_connect);
 725
 726/**
 727 * scif_accept() - Accept a connection request from the remote node
 728 *
 729 * The function accepts a connection request from the remote node.  Successful
 730 * complete is indicate by a new end point being created and passed back
 731 * to the caller for future reference.
 732 *
 733 * Upon successful complete a zero will be returned and the peer information
 734 * will be filled in.
 735 *
 736 * If the end point is not in the listening state -EINVAL will be returned.
 737 *
 738 * If during the connection sequence resource allocation fails the -ENOMEM
 739 * will be returned.
 740 *
 741 * If the function is called with the ASYNC flag set and no connection requests
 742 * are pending it will return -EAGAIN.
 743 *
 744 * If the remote side is not sending any connection requests the caller may
 745 * terminate this function with a signal.  If so a -EINTR will be returned.
 746 */
 747int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
 748                scif_epd_t *newepd, int flags)
 749{
 750        struct scif_endpt *lep = (struct scif_endpt *)epd;
 751        struct scif_endpt *cep;
 752        struct scif_conreq *conreq;
 753        struct scifmsg msg;
 754        int err;
 755        struct device *spdev;
 756
 757        dev_dbg(scif_info.mdev.this_device,
 758                "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
 759
 760        if (flags & ~SCIF_ACCEPT_SYNC)
 761                return -EINVAL;
 762
 763        if (!peer || !newepd)
 764                return -EINVAL;
 765
 766        might_sleep();
 767        spin_lock(&lep->lock);
 768        if (lep->state != SCIFEP_LISTENING) {
 769                spin_unlock(&lep->lock);
 770                return -EINVAL;
 771        }
 772
 773        if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
 774                /* No connection request present and we do not want to wait */
 775                spin_unlock(&lep->lock);
 776                return -EAGAIN;
 777        }
 778
 779        lep->files = current->files;
 780retry_connection:
 781        spin_unlock(&lep->lock);
 782        /* Wait for the remote node to send us a SCIF_CNCT_REQ */
 783        err = wait_event_interruptible(lep->conwq,
 784                                       (lep->conreqcnt ||
 785                                       (lep->state != SCIFEP_LISTENING)));
 786        if (err)
 787                return err;
 788
 789        if (lep->state != SCIFEP_LISTENING)
 790                return -EINTR;
 791
 792        spin_lock(&lep->lock);
 793
 794        if (!lep->conreqcnt)
 795                goto retry_connection;
 796
 797        /* Get the first connect request off the list */
 798        conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
 799        list_del(&conreq->list);
 800        lep->conreqcnt--;
 801        spin_unlock(&lep->lock);
 802
 803        /* Fill in the peer information */
 804        peer->node = conreq->msg.src.node;
 805        peer->port = conreq->msg.src.port;
 806
 807        cep = kzalloc(sizeof(*cep), GFP_KERNEL);
 808        if (!cep) {
 809                err = -ENOMEM;
 810                goto scif_accept_error_epalloc;
 811        }
 812        spin_lock_init(&cep->lock);
 813        mutex_init(&cep->sendlock);
 814        mutex_init(&cep->recvlock);
 815        cep->state = SCIFEP_CONNECTING;
 816        cep->remote_dev = &scif_dev[peer->node];
 817        cep->remote_ep = conreq->msg.payload[0];
 818
 819        scif_rma_ep_init(cep);
 820
 821        err = scif_reserve_dma_chan(cep);
 822        if (err) {
 823                dev_err(scif_info.mdev.this_device,
 824                        "%s %d err %d\n", __func__, __LINE__, err);
 825                goto scif_accept_error_qpalloc;
 826        }
 827
 828        cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
 829        if (!cep->qp_info.qp) {
 830                err = -ENOMEM;
 831                goto scif_accept_error_qpalloc;
 832        }
 833
 834        err = scif_anon_inode_getfile(cep);
 835        if (err)
 836                goto scif_accept_error_anon_inode;
 837
 838        cep->qp_info.qp->magic = SCIFEP_MAGIC;
 839        spdev = scif_get_peer_dev(cep->remote_dev);
 840        if (IS_ERR(spdev)) {
 841                err = PTR_ERR(spdev);
 842                goto scif_accept_error_map;
 843        }
 844        err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
 845                                   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
 846                                   cep->remote_dev);
 847        if (err) {
 848                dev_dbg(&cep->remote_dev->sdev->dev,
 849                        "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
 850                        lep, cep, err, cep->qp_info.qp_offset);
 851                scif_put_peer_dev(spdev);
 852                goto scif_accept_error_map;
 853        }
 854
 855        cep->port.node = lep->port.node;
 856        cep->port.port = lep->port.port;
 857        cep->peer.node = peer->node;
 858        cep->peer.port = peer->port;
 859        init_waitqueue_head(&cep->sendwq);
 860        init_waitqueue_head(&cep->recvwq);
 861        init_waitqueue_head(&cep->conwq);
 862
 863        msg.uop = SCIF_CNCT_GNT;
 864        msg.src = cep->port;
 865        msg.payload[0] = cep->remote_ep;
 866        msg.payload[1] = cep->qp_info.qp_offset;
 867        msg.payload[2] = (u64)cep;
 868
 869        err = _scif_nodeqp_send(cep->remote_dev, &msg);
 870        scif_put_peer_dev(spdev);
 871        if (err)
 872                goto scif_accept_error_map;
 873retry:
 874        /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
 875        err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
 876                                 SCIF_NODE_ACCEPT_TIMEOUT);
 877        if (!err && scifdev_alive(cep))
 878                goto retry;
 879        err = !err ? -ENODEV : 0;
 880        if (err)
 881                goto scif_accept_error_map;
 882        kfree(conreq);
 883
 884        spin_lock(&cep->lock);
 885
 886        if (cep->state == SCIFEP_CLOSING) {
 887                /*
 888                 * Remote failed to allocate resources and NAKed the grant.
 889                 * There is at this point nothing referencing the new end point.
 890                 */
 891                spin_unlock(&cep->lock);
 892                scif_teardown_ep(cep);
 893                kfree(cep);
 894
 895                /* If call with sync flag then go back and wait. */
 896                if (flags & SCIF_ACCEPT_SYNC) {
 897                        spin_lock(&lep->lock);
 898                        goto retry_connection;
 899                }
 900                return -EAGAIN;
 901        }
 902
 903        scif_get_port(cep->port.port);
 904        *newepd = (scif_epd_t)cep;
 905        spin_unlock(&cep->lock);
 906        return 0;
 907scif_accept_error_map:
 908        scif_anon_inode_fput(cep);
 909scif_accept_error_anon_inode:
 910        scif_teardown_ep(cep);
 911scif_accept_error_qpalloc:
 912        kfree(cep);
 913scif_accept_error_epalloc:
 914        msg.uop = SCIF_CNCT_REJ;
 915        msg.dst.node = conreq->msg.src.node;
 916        msg.dst.port = conreq->msg.src.port;
 917        msg.payload[0] = conreq->msg.payload[0];
 918        msg.payload[1] = conreq->msg.payload[1];
 919        scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
 920        kfree(conreq);
 921        return err;
 922}
 923EXPORT_SYMBOL_GPL(scif_accept);
 924
 925/*
 926 * scif_msg_param_check:
 927 * @epd: The end point returned from scif_open()
 928 * @len: Length to receive
 929 * @flags: blocking or non blocking
 930 *
 931 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
 932 */
 933static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
 934{
 935        int ret = -EINVAL;
 936
 937        if (len < 0)
 938                goto err_ret;
 939        if (flags && (!(flags & SCIF_RECV_BLOCK)))
 940                goto err_ret;
 941        ret = 0;
 942err_ret:
 943        return ret;
 944}
 945
 946static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
 947{
 948        struct scif_endpt *ep = (struct scif_endpt *)epd;
 949        struct scifmsg notif_msg;
 950        int curr_xfer_len = 0, sent_len = 0, write_count;
 951        int ret = 0;
 952        struct scif_qp *qp = ep->qp_info.qp;
 953
 954        if (flags & SCIF_SEND_BLOCK)
 955                might_sleep();
 956
 957        spin_lock(&ep->lock);
 958        while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
 959                write_count = scif_rb_space(&qp->outbound_q);
 960                if (write_count) {
 961                        /* Best effort to send as much data as possible */
 962                        curr_xfer_len = min(len - sent_len, write_count);
 963                        ret = scif_rb_write(&qp->outbound_q, msg,
 964                                            curr_xfer_len);
 965                        if (ret < 0)
 966                                break;
 967                        /* Success. Update write pointer */
 968                        scif_rb_commit(&qp->outbound_q);
 969                        /*
 970                         * Send a notification to the peer about the
 971                         * produced data message.
 972                         */
 973                        notif_msg.src = ep->port;
 974                        notif_msg.uop = SCIF_CLIENT_SENT;
 975                        notif_msg.payload[0] = ep->remote_ep;
 976                        ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
 977                        if (ret)
 978                                break;
 979                        sent_len += curr_xfer_len;
 980                        msg = msg + curr_xfer_len;
 981                        continue;
 982                }
 983                curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
 984                /* Not enough RB space. return for the Non Blocking case */
 985                if (!(flags & SCIF_SEND_BLOCK))
 986                        break;
 987
 988                spin_unlock(&ep->lock);
 989                /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
 990                ret =
 991                wait_event_interruptible(ep->sendwq,
 992                                         (SCIFEP_CONNECTED != ep->state) ||
 993                                         (scif_rb_space(&qp->outbound_q) >=
 994                                         curr_xfer_len));
 995                spin_lock(&ep->lock);
 996                if (ret)
 997                        break;
 998        }
 999        if (sent_len)
1000                ret = sent_len;
1001        else if (!ret && SCIFEP_CONNECTED != ep->state)
1002                ret = SCIFEP_DISCONNECTED == ep->state ?
1003                        -ECONNRESET : -ENOTCONN;
1004        spin_unlock(&ep->lock);
1005        return ret;
1006}
1007
1008static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1009{
1010        int read_size;
1011        struct scif_endpt *ep = (struct scif_endpt *)epd;
1012        struct scifmsg notif_msg;
1013        int curr_recv_len = 0, remaining_len = len, read_count;
1014        int ret = 0;
1015        struct scif_qp *qp = ep->qp_info.qp;
1016
1017        if (flags & SCIF_RECV_BLOCK)
1018                might_sleep();
1019        spin_lock(&ep->lock);
1020        while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1021                                 SCIFEP_DISCONNECTED == ep->state)) {
1022                read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1023                if (read_count) {
1024                        /*
1025                         * Best effort to recv as much data as there
1026                         * are bytes to read in the RB particularly
1027                         * important for the Non Blocking case.
1028                         */
1029                        curr_recv_len = min(remaining_len, read_count);
1030                        read_size = scif_rb_get_next(&qp->inbound_q,
1031                                                     msg, curr_recv_len);
1032                        if (ep->state == SCIFEP_CONNECTED) {
1033                                /*
1034                                 * Update the read pointer only if the endpoint
1035                                 * is still connected else the read pointer
1036                                 * might no longer exist since the peer has
1037                                 * freed resources!
1038                                 */
1039                                scif_rb_update_read_ptr(&qp->inbound_q);
1040                                /*
1041                                 * Send a notification to the peer about the
1042                                 * consumed data message only if the EP is in
1043                                 * SCIFEP_CONNECTED state.
1044                                 */
1045                                notif_msg.src = ep->port;
1046                                notif_msg.uop = SCIF_CLIENT_RCVD;
1047                                notif_msg.payload[0] = ep->remote_ep;
1048                                ret = _scif_nodeqp_send(ep->remote_dev,
1049                                                        &notif_msg);
1050                                if (ret)
1051                                        break;
1052                        }
1053                        remaining_len -= curr_recv_len;
1054                        msg = msg + curr_recv_len;
1055                        continue;
1056                }
1057                /*
1058                 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1059                 * we will keep looping forever.
1060                 */
1061                if (ep->state == SCIFEP_DISCONNECTED)
1062                        break;
1063                /*
1064                 * Return in the Non Blocking case if there is no data
1065                 * to read in this iteration.
1066                 */
1067                if (!(flags & SCIF_RECV_BLOCK))
1068                        break;
1069                curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1070                spin_unlock(&ep->lock);
1071                /*
1072                 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1073                 * or until other side disconnects.
1074                 */
1075                ret =
1076                wait_event_interruptible(ep->recvwq,
1077                                         SCIFEP_CONNECTED != ep->state ||
1078                                         scif_rb_count(&qp->inbound_q,
1079                                                       curr_recv_len)
1080                                         >= curr_recv_len);
1081                spin_lock(&ep->lock);
1082                if (ret)
1083                        break;
1084        }
1085        if (len - remaining_len)
1086                ret = len - remaining_len;
1087        else if (!ret && ep->state != SCIFEP_CONNECTED)
1088                ret = ep->state == SCIFEP_DISCONNECTED ?
1089                        -ECONNRESET : -ENOTCONN;
1090        spin_unlock(&ep->lock);
1091        return ret;
1092}
1093
1094/**
1095 * scif_user_send() - Send data to connection queue
1096 * @epd: The end point returned from scif_open()
1097 * @msg: Address to place data
1098 * @len: Length to receive
1099 * @flags: blocking or non blocking
1100 *
1101 * This function is called from the driver IOCTL entry point
1102 * only and is a wrapper for _scif_send().
1103 */
1104int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1105{
1106        struct scif_endpt *ep = (struct scif_endpt *)epd;
1107        int err = 0;
1108        int sent_len = 0;
1109        char *tmp;
1110        int loop_len;
1111        int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1112
1113        dev_dbg(scif_info.mdev.this_device,
1114                "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1115        if (!len)
1116                return 0;
1117
1118        err = scif_msg_param_check(epd, len, flags);
1119        if (err)
1120                goto send_err;
1121
1122        tmp = kmalloc(chunk_len, GFP_KERNEL);
1123        if (!tmp) {
1124                err = -ENOMEM;
1125                goto send_err;
1126        }
1127        /*
1128         * Grabbing the lock before breaking up the transfer in
1129         * multiple chunks is required to ensure that messages do
1130         * not get fragmented and reordered.
1131         */
1132        mutex_lock(&ep->sendlock);
1133        while (sent_len != len) {
1134                loop_len = len - sent_len;
1135                loop_len = min(chunk_len, loop_len);
1136                if (copy_from_user(tmp, msg, loop_len)) {
1137                        err = -EFAULT;
1138                        goto send_free_err;
1139                }
1140                err = _scif_send(epd, tmp, loop_len, flags);
1141                if (err < 0)
1142                        goto send_free_err;
1143                sent_len += err;
1144                msg += err;
1145                if (err != loop_len)
1146                        goto send_free_err;
1147        }
1148send_free_err:
1149        mutex_unlock(&ep->sendlock);
1150        kfree(tmp);
1151send_err:
1152        return err < 0 ? err : sent_len;
1153}
1154
1155/**
1156 * scif_user_recv() - Receive data from connection queue
1157 * @epd: The end point returned from scif_open()
1158 * @msg: Address to place data
1159 * @len: Length to receive
1160 * @flags: blocking or non blocking
1161 *
1162 * This function is called from the driver IOCTL entry point
1163 * only and is a wrapper for _scif_recv().
1164 */
1165int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1166{
1167        struct scif_endpt *ep = (struct scif_endpt *)epd;
1168        int err = 0;
1169        int recv_len = 0;
1170        char *tmp;
1171        int loop_len;
1172        int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1173
1174        dev_dbg(scif_info.mdev.this_device,
1175                "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1176        if (!len)
1177                return 0;
1178
1179        err = scif_msg_param_check(epd, len, flags);
1180        if (err)
1181                goto recv_err;
1182
1183        tmp = kmalloc(chunk_len, GFP_KERNEL);
1184        if (!tmp) {
1185                err = -ENOMEM;
1186                goto recv_err;
1187        }
1188        /*
1189         * Grabbing the lock before breaking up the transfer in
1190         * multiple chunks is required to ensure that messages do
1191         * not get fragmented and reordered.
1192         */
1193        mutex_lock(&ep->recvlock);
1194        while (recv_len != len) {
1195                loop_len = len - recv_len;
1196                loop_len = min(chunk_len, loop_len);
1197                err = _scif_recv(epd, tmp, loop_len, flags);
1198                if (err < 0)
1199                        goto recv_free_err;
1200                if (copy_to_user(msg, tmp, err)) {
1201                        err = -EFAULT;
1202                        goto recv_free_err;
1203                }
1204                recv_len += err;
1205                msg += err;
1206                if (err != loop_len)
1207                        goto recv_free_err;
1208        }
1209recv_free_err:
1210        mutex_unlock(&ep->recvlock);
1211        kfree(tmp);
1212recv_err:
1213        return err < 0 ? err : recv_len;
1214}
1215
1216/**
1217 * scif_send() - Send data to connection queue
1218 * @epd: The end point returned from scif_open()
1219 * @msg: Address to place data
1220 * @len: Length to receive
1221 * @flags: blocking or non blocking
1222 *
1223 * This function is called from the kernel mode only and is
1224 * a wrapper for _scif_send().
1225 */
1226int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1227{
1228        struct scif_endpt *ep = (struct scif_endpt *)epd;
1229        int ret;
1230
1231        dev_dbg(scif_info.mdev.this_device,
1232                "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1233        if (!len)
1234                return 0;
1235
1236        ret = scif_msg_param_check(epd, len, flags);
1237        if (ret)
1238                return ret;
1239        if (!ep->remote_dev)
1240                return -ENOTCONN;
1241        /*
1242         * Grab the mutex lock in the blocking case only
1243         * to ensure messages do not get fragmented/reordered.
1244         * The non blocking mode is protected using spin locks
1245         * in _scif_send().
1246         */
1247        if (flags & SCIF_SEND_BLOCK)
1248                mutex_lock(&ep->sendlock);
1249
1250        ret = _scif_send(epd, msg, len, flags);
1251
1252        if (flags & SCIF_SEND_BLOCK)
1253                mutex_unlock(&ep->sendlock);
1254        return ret;
1255}
1256EXPORT_SYMBOL_GPL(scif_send);
1257
1258/**
1259 * scif_recv() - Receive data from connection queue
1260 * @epd: The end point returned from scif_open()
1261 * @msg: Address to place data
1262 * @len: Length to receive
1263 * @flags: blocking or non blocking
1264 *
1265 * This function is called from the kernel mode only and is
1266 * a wrapper for _scif_recv().
1267 */
1268int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1269{
1270        struct scif_endpt *ep = (struct scif_endpt *)epd;
1271        int ret;
1272
1273        dev_dbg(scif_info.mdev.this_device,
1274                "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1275        if (!len)
1276                return 0;
1277
1278        ret = scif_msg_param_check(epd, len, flags);
1279        if (ret)
1280                return ret;
1281        /*
1282         * Grab the mutex lock in the blocking case only
1283         * to ensure messages do not get fragmented/reordered.
1284         * The non blocking mode is protected using spin locks
1285         * in _scif_send().
1286         */
1287        if (flags & SCIF_RECV_BLOCK)
1288                mutex_lock(&ep->recvlock);
1289
1290        ret = _scif_recv(epd, msg, len, flags);
1291
1292        if (flags & SCIF_RECV_BLOCK)
1293                mutex_unlock(&ep->recvlock);
1294
1295        return ret;
1296}
1297EXPORT_SYMBOL_GPL(scif_recv);
1298
1299static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1300                                   poll_table *p, struct scif_endpt *ep)
1301{
1302        /*
1303         * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1304         * and regrab it afterwards. Because the endpoint state might have
1305         * changed while the lock was given up, the state must be checked
1306         * again after re-acquiring the lock. The code in __scif_pollfd(..)
1307         * does this.
1308         */
1309        spin_unlock(&ep->lock);
1310        poll_wait(f, wq, p);
1311        spin_lock(&ep->lock);
1312}
1313
1314unsigned int
1315__scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1316{
1317        unsigned int mask = 0;
1318
1319        dev_dbg(scif_info.mdev.this_device,
1320                "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1321
1322        spin_lock(&ep->lock);
1323
1324        /* Endpoint is waiting for a non-blocking connect to complete */
1325        if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1326                _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1327                if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1328                        if (ep->state == SCIFEP_CONNECTED ||
1329                            ep->state == SCIFEP_DISCONNECTED ||
1330                            ep->conn_err)
1331                                mask |= POLLOUT;
1332                        goto exit;
1333                }
1334        }
1335
1336        /* Endpoint is listening for incoming connection requests */
1337        if (ep->state == SCIFEP_LISTENING) {
1338                _scif_poll_wait(f, &ep->conwq, wait, ep);
1339                if (ep->state == SCIFEP_LISTENING) {
1340                        if (ep->conreqcnt)
1341                                mask |= POLLIN;
1342                        goto exit;
1343                }
1344        }
1345
1346        /* Endpoint is connected or disconnected */
1347        if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1348                if (poll_requested_events(wait) & POLLIN)
1349                        _scif_poll_wait(f, &ep->recvwq, wait, ep);
1350                if (poll_requested_events(wait) & POLLOUT)
1351                        _scif_poll_wait(f, &ep->sendwq, wait, ep);
1352                if (ep->state == SCIFEP_CONNECTED ||
1353                    ep->state == SCIFEP_DISCONNECTED) {
1354                        /* Data can be read without blocking */
1355                        if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1356                                mask |= POLLIN;
1357                        /* Data can be written without blocking */
1358                        if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1359                                mask |= POLLOUT;
1360                        /* Return POLLHUP if endpoint is disconnected */
1361                        if (ep->state == SCIFEP_DISCONNECTED)
1362                                mask |= POLLHUP;
1363                        goto exit;
1364                }
1365        }
1366
1367        /* Return POLLERR if the endpoint is in none of the above states */
1368        mask |= POLLERR;
1369exit:
1370        spin_unlock(&ep->lock);
1371        return mask;
1372}
1373
1374/**
1375 * scif_poll() - Kernel mode SCIF poll
1376 * @ufds: Array of scif_pollepd structures containing the end points
1377 *        and events to poll on
1378 * @nfds: Size of the ufds array
1379 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1380 *
1381 * The code flow in this function is based on do_poll(..) in select.c
1382 *
1383 * Returns the number of endpoints which have pending events or 0 in
1384 * the event of a timeout. If a signal is used for wake up, -EINTR is
1385 * returned.
1386 */
1387int
1388scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1389{
1390        struct poll_wqueues table;
1391        poll_table *pt;
1392        int i, mask, count = 0, timed_out = timeout_msecs == 0;
1393        u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1394                : msecs_to_jiffies(timeout_msecs);
1395
1396        poll_initwait(&table);
1397        pt = &table.pt;
1398        while (1) {
1399                for (i = 0; i < nfds; i++) {
1400                        pt->_key = ufds[i].events | POLLERR | POLLHUP;
1401                        mask = __scif_pollfd(ufds[i].epd->anon,
1402                                             pt, ufds[i].epd);
1403                        mask &= ufds[i].events | POLLERR | POLLHUP;
1404                        if (mask) {
1405                                count++;
1406                                pt->_qproc = NULL;
1407                        }
1408                        ufds[i].revents = mask;
1409                }
1410                pt->_qproc = NULL;
1411                if (!count) {
1412                        count = table.error;
1413                        if (signal_pending(current))
1414                                count = -EINTR;
1415                }
1416                if (count || timed_out)
1417                        break;
1418
1419                if (!schedule_timeout_interruptible(timeout))
1420                        timed_out = 1;
1421        }
1422        poll_freewait(&table);
1423        return count;
1424}
1425EXPORT_SYMBOL_GPL(scif_poll);
1426
1427int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1428{
1429        int online = 0;
1430        int offset = 0;
1431        int node;
1432
1433        if (!scif_is_mgmt_node())
1434                scif_get_node_info();
1435
1436        *self = scif_info.nodeid;
1437        mutex_lock(&scif_info.conflock);
1438        len = min_t(int, len, scif_info.total);
1439        for (node = 0; node <= scif_info.maxid; node++) {
1440                if (_scifdev_alive(&scif_dev[node])) {
1441                        online++;
1442                        if (offset < len)
1443                                nodes[offset++] = node;
1444                }
1445        }
1446        dev_dbg(scif_info.mdev.this_device,
1447                "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1448                scif_info.total, online, offset);
1449        mutex_unlock(&scif_info.conflock);
1450
1451        return online;
1452}
1453EXPORT_SYMBOL_GPL(scif_get_node_ids);
1454
1455static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1456{
1457        struct scif_client *client =
1458                container_of(si, struct scif_client, si);
1459        struct scif_peer_dev *spdev =
1460                container_of(dev, struct scif_peer_dev, dev);
1461
1462        if (client->probe)
1463                client->probe(spdev);
1464        return 0;
1465}
1466
1467static void scif_remove_client_dev(struct device *dev,
1468                                   struct subsys_interface *si)
1469{
1470        struct scif_client *client =
1471                container_of(si, struct scif_client, si);
1472        struct scif_peer_dev *spdev =
1473                container_of(dev, struct scif_peer_dev, dev);
1474
1475        if (client->remove)
1476                client->remove(spdev);
1477}
1478
1479void scif_client_unregister(struct scif_client *client)
1480{
1481        subsys_interface_unregister(&client->si);
1482}
1483EXPORT_SYMBOL_GPL(scif_client_unregister);
1484
1485int scif_client_register(struct scif_client *client)
1486{
1487        struct subsys_interface *si = &client->si;
1488
1489        si->name = client->name;
1490        si->subsys = &scif_peer_bus;
1491        si->add_dev = scif_add_client_dev;
1492        si->remove_dev = scif_remove_client_dev;
1493
1494        return subsys_interface_register(&client->si);
1495}
1496EXPORT_SYMBOL_GPL(scif_client_register);
1497