linux/net/smc/af_smc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
   6 *  applies to SOCK_STREAM sockets only
   7 *  offers an alternative communication option for TCP-protocol sockets
   8 *  applicable with RoCE-cards only
   9 *
  10 *  Initial restrictions:
  11 *    - support for alternate links postponed
  12 *
  13 *  Copyright IBM Corp. 2016, 2018
  14 *
  15 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  16 *              based on prototype from Frank Blaschka
  17 */
  18
  19#define KMSG_COMPONENT "smc"
  20#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  21
  22#include <linux/module.h>
  23#include <linux/socket.h>
  24#include <linux/workqueue.h>
  25#include <linux/in.h>
  26#include <linux/sched/signal.h>
  27#include <linux/if_vlan.h>
  28#include <linux/rcupdate_wait.h>
  29
  30#include <net/sock.h>
  31#include <net/tcp.h>
  32#include <net/smc.h>
  33#include <asm/ioctls.h>
  34
  35#include <net/net_namespace.h>
  36#include <net/netns/generic.h>
  37#include "smc_netns.h"
  38
  39#include "smc.h"
  40#include "smc_clc.h"
  41#include "smc_llc.h"
  42#include "smc_cdc.h"
  43#include "smc_core.h"
  44#include "smc_ib.h"
  45#include "smc_ism.h"
  46#include "smc_pnet.h"
  47#include "smc_tx.h"
  48#include "smc_rx.h"
  49#include "smc_close.h"
  50
  51static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link group
  52                                                 * creation on server
  53                                                 */
  54static DEFINE_MUTEX(smc_client_lgr_pending);    /* serialize link group
  55                                                 * creation on client
  56                                                 */
  57
  58static void smc_tcp_listen_work(struct work_struct *);
  59static void smc_connect_work(struct work_struct *);
  60
  61static void smc_set_keepalive(struct sock *sk, int val)
  62{
  63        struct smc_sock *smc = smc_sk(sk);
  64
  65        smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
  66}
  67
  68static struct smc_hashinfo smc_v4_hashinfo = {
  69        .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
  70};
  71
  72static struct smc_hashinfo smc_v6_hashinfo = {
  73        .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
  74};
  75
  76int smc_hash_sk(struct sock *sk)
  77{
  78        struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  79        struct hlist_head *head;
  80
  81        head = &h->ht;
  82
  83        write_lock_bh(&h->lock);
  84        sk_add_node(sk, head);
  85        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
  86        write_unlock_bh(&h->lock);
  87
  88        return 0;
  89}
  90EXPORT_SYMBOL_GPL(smc_hash_sk);
  91
  92void smc_unhash_sk(struct sock *sk)
  93{
  94        struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  95
  96        write_lock_bh(&h->lock);
  97        if (sk_del_node_init(sk))
  98                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  99        write_unlock_bh(&h->lock);
 100}
 101EXPORT_SYMBOL_GPL(smc_unhash_sk);
 102
 103struct proto smc_proto = {
 104        .name           = "SMC",
 105        .owner          = THIS_MODULE,
 106        .keepalive      = smc_set_keepalive,
 107        .hash           = smc_hash_sk,
 108        .unhash         = smc_unhash_sk,
 109        .obj_size       = sizeof(struct smc_sock),
 110        .h.smc_hash     = &smc_v4_hashinfo,
 111        .slab_flags     = SLAB_TYPESAFE_BY_RCU,
 112};
 113EXPORT_SYMBOL_GPL(smc_proto);
 114
 115struct proto smc_proto6 = {
 116        .name           = "SMC6",
 117        .owner          = THIS_MODULE,
 118        .keepalive      = smc_set_keepalive,
 119        .hash           = smc_hash_sk,
 120        .unhash         = smc_unhash_sk,
 121        .obj_size       = sizeof(struct smc_sock),
 122        .h.smc_hash     = &smc_v6_hashinfo,
 123        .slab_flags     = SLAB_TYPESAFE_BY_RCU,
 124};
 125EXPORT_SYMBOL_GPL(smc_proto6);
 126
 127static void smc_restore_fallback_changes(struct smc_sock *smc)
 128{
 129        if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
 130                smc->clcsock->file->private_data = smc->sk.sk_socket;
 131                smc->clcsock->file = NULL;
 132        }
 133}
 134
 135static int __smc_release(struct smc_sock *smc)
 136{
 137        struct sock *sk = &smc->sk;
 138        int rc = 0;
 139
 140        if (!smc->use_fallback) {
 141                rc = smc_close_active(smc);
 142                sock_set_flag(sk, SOCK_DEAD);
 143                sk->sk_shutdown |= SHUTDOWN_MASK;
 144        } else {
 145                if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
 146                        sock_put(sk); /* passive closing */
 147                if (sk->sk_state == SMC_LISTEN) {
 148                        /* wake up clcsock accept */
 149                        rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
 150                }
 151                sk->sk_state = SMC_CLOSED;
 152                sk->sk_state_change(sk);
 153                smc_restore_fallback_changes(smc);
 154        }
 155
 156        sk->sk_prot->unhash(sk);
 157
 158        if (sk->sk_state == SMC_CLOSED) {
 159                if (smc->clcsock) {
 160                        release_sock(sk);
 161                        smc_clcsock_release(smc);
 162                        lock_sock(sk);
 163                }
 164                if (!smc->use_fallback)
 165                        smc_conn_free(&smc->conn);
 166        }
 167
 168        return rc;
 169}
 170
 171static int smc_release(struct socket *sock)
 172{
 173        struct sock *sk = sock->sk;
 174        struct smc_sock *smc;
 175        int rc = 0;
 176
 177        if (!sk)
 178                goto out;
 179
 180        sock_hold(sk); /* sock_put below */
 181        smc = smc_sk(sk);
 182
 183        /* cleanup for a dangling non-blocking connect */
 184        if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
 185                tcp_abort(smc->clcsock->sk, ECONNABORTED);
 186        flush_work(&smc->connect_work);
 187
 188        if (sk->sk_state == SMC_LISTEN)
 189                /* smc_close_non_accepted() is called and acquires
 190                 * sock lock for child sockets again
 191                 */
 192                lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 193        else
 194                lock_sock(sk);
 195
 196        rc = __smc_release(smc);
 197
 198        /* detach socket */
 199        sock_orphan(sk);
 200        sock->sk = NULL;
 201        release_sock(sk);
 202
 203        sock_put(sk); /* sock_hold above */
 204        sock_put(sk); /* final sock_put */
 205out:
 206        return rc;
 207}
 208
 209static void smc_destruct(struct sock *sk)
 210{
 211        if (sk->sk_state != SMC_CLOSED)
 212                return;
 213        if (!sock_flag(sk, SOCK_DEAD))
 214                return;
 215
 216        sk_refcnt_debug_dec(sk);
 217}
 218
 219static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
 220                                   int protocol)
 221{
 222        struct smc_sock *smc;
 223        struct proto *prot;
 224        struct sock *sk;
 225
 226        prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
 227        sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
 228        if (!sk)
 229                return NULL;
 230
 231        sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
 232        sk->sk_state = SMC_INIT;
 233        sk->sk_destruct = smc_destruct;
 234        sk->sk_protocol = protocol;
 235        smc = smc_sk(sk);
 236        INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 237        INIT_WORK(&smc->connect_work, smc_connect_work);
 238        INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
 239        INIT_LIST_HEAD(&smc->accept_q);
 240        spin_lock_init(&smc->accept_q_lock);
 241        spin_lock_init(&smc->conn.send_lock);
 242        sk->sk_prot->hash(sk);
 243        sk_refcnt_debug_inc(sk);
 244        mutex_init(&smc->clcsock_release_lock);
 245
 246        return sk;
 247}
 248
 249static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
 250                    int addr_len)
 251{
 252        struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
 253        struct sock *sk = sock->sk;
 254        struct smc_sock *smc;
 255        int rc;
 256
 257        smc = smc_sk(sk);
 258
 259        /* replicate tests from inet_bind(), to be safe wrt. future changes */
 260        rc = -EINVAL;
 261        if (addr_len < sizeof(struct sockaddr_in))
 262                goto out;
 263
 264        rc = -EAFNOSUPPORT;
 265        if (addr->sin_family != AF_INET &&
 266            addr->sin_family != AF_INET6 &&
 267            addr->sin_family != AF_UNSPEC)
 268                goto out;
 269        /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
 270        if (addr->sin_family == AF_UNSPEC &&
 271            addr->sin_addr.s_addr != htonl(INADDR_ANY))
 272                goto out;
 273
 274        lock_sock(sk);
 275
 276        /* Check if socket is already active */
 277        rc = -EINVAL;
 278        if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
 279                goto out_rel;
 280
 281        smc->clcsock->sk->sk_reuse = sk->sk_reuse;
 282        rc = kernel_bind(smc->clcsock, uaddr, addr_len);
 283
 284out_rel:
 285        release_sock(sk);
 286out:
 287        return rc;
 288}
 289
 290static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
 291                                   unsigned long mask)
 292{
 293        /* options we don't get control via setsockopt for */
 294        nsk->sk_type = osk->sk_type;
 295        nsk->sk_sndbuf = osk->sk_sndbuf;
 296        nsk->sk_rcvbuf = osk->sk_rcvbuf;
 297        nsk->sk_sndtimeo = osk->sk_sndtimeo;
 298        nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
 299        nsk->sk_mark = osk->sk_mark;
 300        nsk->sk_priority = osk->sk_priority;
 301        nsk->sk_rcvlowat = osk->sk_rcvlowat;
 302        nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
 303        nsk->sk_err = osk->sk_err;
 304
 305        nsk->sk_flags &= ~mask;
 306        nsk->sk_flags |= osk->sk_flags & mask;
 307}
 308
 309#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
 310                             (1UL << SOCK_KEEPOPEN) | \
 311                             (1UL << SOCK_LINGER) | \
 312                             (1UL << SOCK_BROADCAST) | \
 313                             (1UL << SOCK_TIMESTAMP) | \
 314                             (1UL << SOCK_DBG) | \
 315                             (1UL << SOCK_RCVTSTAMP) | \
 316                             (1UL << SOCK_RCVTSTAMPNS) | \
 317                             (1UL << SOCK_LOCALROUTE) | \
 318                             (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
 319                             (1UL << SOCK_RXQ_OVFL) | \
 320                             (1UL << SOCK_WIFI_STATUS) | \
 321                             (1UL << SOCK_NOFCS) | \
 322                             (1UL << SOCK_FILTER_LOCKED) | \
 323                             (1UL << SOCK_TSTAMP_NEW))
 324/* copy only relevant settings and flags of SOL_SOCKET level from smc to
 325 * clc socket (since smc is not called for these options from net/core)
 326 */
 327static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
 328{
 329        smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
 330}
 331
 332#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
 333                             (1UL << SOCK_KEEPOPEN) | \
 334                             (1UL << SOCK_LINGER) | \
 335                             (1UL << SOCK_DBG))
 336/* copy only settings and flags relevant for smc from clc to smc socket */
 337static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 338{
 339        smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 340}
 341
 342/* register the new rmb on all links */
 343static int smcr_lgr_reg_rmbs(struct smc_link *link,
 344                             struct smc_buf_desc *rmb_desc)
 345{
 346        struct smc_link_group *lgr = link->lgr;
 347        int i, rc = 0;
 348
 349        rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
 350        if (rc)
 351                return rc;
 352        /* protect against parallel smc_llc_cli_rkey_exchange() and
 353         * parallel smcr_link_reg_rmb()
 354         */
 355        mutex_lock(&lgr->llc_conf_mutex);
 356        for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 357                if (!smc_link_active(&lgr->lnk[i]))
 358                        continue;
 359                rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
 360                if (rc)
 361                        goto out;
 362        }
 363
 364        /* exchange confirm_rkey msg with peer */
 365        rc = smc_llc_do_confirm_rkey(link, rmb_desc);
 366        if (rc) {
 367                rc = -EFAULT;
 368                goto out;
 369        }
 370        rmb_desc->is_conf_rkey = true;
 371out:
 372        mutex_unlock(&lgr->llc_conf_mutex);
 373        smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 374        return rc;
 375}
 376
 377static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 378{
 379        struct smc_link *link = smc->conn.lnk;
 380        struct smc_llc_qentry *qentry;
 381        int rc;
 382
 383        /* receive CONFIRM LINK request from server over RoCE fabric */
 384        qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
 385                              SMC_LLC_CONFIRM_LINK);
 386        if (!qentry) {
 387                struct smc_clc_msg_decline dclc;
 388
 389                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 390                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 391                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 392        }
 393        smc_llc_save_peer_uid(qentry);
 394        rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
 395        smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
 396        if (rc)
 397                return SMC_CLC_DECL_RMBE_EC;
 398
 399        rc = smc_ib_modify_qp_rts(link);
 400        if (rc)
 401                return SMC_CLC_DECL_ERR_RDYLNK;
 402
 403        smc_wr_remember_qp_attr(link);
 404
 405        if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
 406                return SMC_CLC_DECL_ERR_REGRMB;
 407
 408        /* confirm_rkey is implicit on 1st contact */
 409        smc->conn.rmb_desc->is_conf_rkey = true;
 410
 411        /* send CONFIRM LINK response over RoCE fabric */
 412        rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
 413        if (rc < 0)
 414                return SMC_CLC_DECL_TIMEOUT_CL;
 415
 416        smc_llc_link_active(link);
 417        smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
 418
 419        /* optional 2nd link, receive ADD LINK request from server */
 420        qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
 421                              SMC_LLC_ADD_LINK);
 422        if (!qentry) {
 423                struct smc_clc_msg_decline dclc;
 424
 425                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 426                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 427                if (rc == -EAGAIN)
 428                        rc = 0; /* no DECLINE received, go with one link */
 429                return rc;
 430        }
 431        smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
 432        smc_llc_cli_add_link(link, qentry);
 433        return 0;
 434}
 435
 436static void smcr_conn_save_peer_info(struct smc_sock *smc,
 437                                     struct smc_clc_msg_accept_confirm *clc)
 438{
 439        int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
 440
 441        smc->conn.peer_rmbe_idx = clc->rmbe_idx;
 442        smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
 443        smc->conn.peer_rmbe_size = bufsize;
 444        atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
 445        smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
 446}
 447
 448static void smcd_conn_save_peer_info(struct smc_sock *smc,
 449                                     struct smc_clc_msg_accept_confirm *clc)
 450{
 451        int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
 452
 453        smc->conn.peer_rmbe_idx = clc->dmbe_idx;
 454        smc->conn.peer_token = clc->token;
 455        /* msg header takes up space in the buffer */
 456        smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
 457        atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
 458        smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
 459}
 460
 461static void smc_conn_save_peer_info(struct smc_sock *smc,
 462                                    struct smc_clc_msg_accept_confirm *clc)
 463{
 464        if (smc->conn.lgr->is_smcd)
 465                smcd_conn_save_peer_info(smc, clc);
 466        else
 467                smcr_conn_save_peer_info(smc, clc);
 468}
 469
 470static void smc_link_save_peer_info(struct smc_link *link,
 471                                    struct smc_clc_msg_accept_confirm *clc)
 472{
 473        link->peer_qpn = ntoh24(clc->qpn);
 474        memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
 475        memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
 476        link->peer_psn = ntoh24(clc->psn);
 477        link->peer_mtu = clc->qp_mtu;
 478}
 479
 480static void smc_switch_to_fallback(struct smc_sock *smc)
 481{
 482        smc->use_fallback = true;
 483        if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
 484                smc->clcsock->file = smc->sk.sk_socket->file;
 485                smc->clcsock->file->private_data = smc->clcsock;
 486                smc->clcsock->wq.fasync_list =
 487                        smc->sk.sk_socket->wq.fasync_list;
 488        }
 489}
 490
 491/* fall back during connect */
 492static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 493{
 494        smc_switch_to_fallback(smc);
 495        smc->fallback_rsn = reason_code;
 496        smc_copy_sock_settings_to_clc(smc);
 497        smc->connect_nonblock = 0;
 498        if (smc->sk.sk_state == SMC_INIT)
 499                smc->sk.sk_state = SMC_ACTIVE;
 500        return 0;
 501}
 502
 503/* decline and fall back during connect */
 504static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
 505{
 506        int rc;
 507
 508        if (reason_code < 0) { /* error, fallback is not possible */
 509                if (smc->sk.sk_state == SMC_INIT)
 510                        sock_put(&smc->sk); /* passive closing */
 511                return reason_code;
 512        }
 513        if (reason_code != SMC_CLC_DECL_PEERDECL) {
 514                rc = smc_clc_send_decline(smc, reason_code);
 515                if (rc < 0) {
 516                        if (smc->sk.sk_state == SMC_INIT)
 517                                sock_put(&smc->sk); /* passive closing */
 518                        return rc;
 519                }
 520        }
 521        return smc_connect_fallback(smc, reason_code);
 522}
 523
 524/* abort connecting */
 525static int smc_connect_abort(struct smc_sock *smc, int reason_code,
 526                             int local_contact)
 527{
 528        bool is_smcd = smc->conn.lgr->is_smcd;
 529
 530        if (local_contact == SMC_FIRST_CONTACT)
 531                smc_lgr_cleanup_early(&smc->conn);
 532        else
 533                smc_conn_free(&smc->conn);
 534        if (is_smcd)
 535                /* there is only one lgr role for SMC-D; use server lock */
 536                mutex_unlock(&smc_server_lgr_pending);
 537        else
 538                mutex_unlock(&smc_client_lgr_pending);
 539
 540        smc->connect_nonblock = 0;
 541        return reason_code;
 542}
 543
 544/* check if there is a rdma device available for this connection. */
 545/* called for connect and listen */
 546static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
 547{
 548        /* PNET table look up: search active ib_device and port
 549         * within same PNETID that also contains the ethernet device
 550         * used for the internal TCP socket
 551         */
 552        smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
 553        if (!ini->ib_dev)
 554                return SMC_CLC_DECL_NOSMCRDEV;
 555        return 0;
 556}
 557
 558/* check if there is an ISM device available for this connection. */
 559/* called for connect and listen */
 560static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
 561{
 562        /* Find ISM device with same PNETID as connecting interface  */
 563        smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
 564        if (!ini->ism_dev)
 565                return SMC_CLC_DECL_NOSMCDDEV;
 566        return 0;
 567}
 568
 569/* Check for VLAN ID and register it on ISM device just for CLC handshake */
 570static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
 571                                      struct smc_init_info *ini)
 572{
 573        if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
 574                return SMC_CLC_DECL_ISMVLANERR;
 575        return 0;
 576}
 577
 578/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
 579 * used, the VLAN ID will be registered again during the connection setup.
 580 */
 581static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
 582                                        struct smc_init_info *ini)
 583{
 584        if (!is_smcd)
 585                return 0;
 586        if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
 587                return SMC_CLC_DECL_CNFERR;
 588        return 0;
 589}
 590
 591/* CLC handshake during connect */
 592static int smc_connect_clc(struct smc_sock *smc, int smc_type,
 593                           struct smc_clc_msg_accept_confirm *aclc,
 594                           struct smc_init_info *ini)
 595{
 596        int rc = 0;
 597
 598        /* do inband token exchange */
 599        rc = smc_clc_send_proposal(smc, smc_type, ini);
 600        if (rc)
 601                return rc;
 602        /* receive SMC Accept CLC message */
 603        return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
 604                                CLC_WAIT_TIME);
 605}
 606
 607/* setup for RDMA connection of client */
 608static int smc_connect_rdma(struct smc_sock *smc,
 609                            struct smc_clc_msg_accept_confirm *aclc,
 610                            struct smc_init_info *ini)
 611{
 612        int i, reason_code = 0;
 613        struct smc_link *link;
 614
 615        ini->is_smcd = false;
 616        ini->ib_lcl = &aclc->lcl;
 617        ini->ib_clcqpn = ntoh24(aclc->qpn);
 618        ini->srv_first_contact = aclc->hdr.flag;
 619
 620        mutex_lock(&smc_client_lgr_pending);
 621        reason_code = smc_conn_create(smc, ini);
 622        if (reason_code) {
 623                mutex_unlock(&smc_client_lgr_pending);
 624                return reason_code;
 625        }
 626
 627        smc_conn_save_peer_info(smc, aclc);
 628
 629        if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 630                link = smc->conn.lnk;
 631        } else {
 632                /* set link that was assigned by server */
 633                link = NULL;
 634                for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 635                        struct smc_link *l = &smc->conn.lgr->lnk[i];
 636
 637                        if (l->peer_qpn == ntoh24(aclc->qpn) &&
 638                            !memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) &&
 639                            !memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) {
 640                                link = l;
 641                                break;
 642                        }
 643                }
 644                if (!link)
 645                        return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK,
 646                                                 ini->cln_first_contact);
 647                smc->conn.lnk = link;
 648        }
 649
 650        /* create send buffer and rmb */
 651        if (smc_buf_create(smc, false))
 652                return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
 653                                         ini->cln_first_contact);
 654
 655        if (ini->cln_first_contact == SMC_FIRST_CONTACT)
 656                smc_link_save_peer_info(link, aclc);
 657
 658        if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
 659                return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
 660                                         ini->cln_first_contact);
 661
 662        smc_close_init(smc);
 663        smc_rx_init(smc);
 664
 665        if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 666                if (smc_ib_ready_link(link))
 667                        return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
 668                                                 ini->cln_first_contact);
 669        } else {
 670                if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc))
 671                        return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
 672                                                 ini->cln_first_contact);
 673        }
 674        smc_rmb_sync_sg_for_device(&smc->conn);
 675
 676        reason_code = smc_clc_send_confirm(smc);
 677        if (reason_code)
 678                return smc_connect_abort(smc, reason_code,
 679                                         ini->cln_first_contact);
 680
 681        smc_tx_init(smc);
 682
 683        if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 684                /* QP confirmation over RoCE fabric */
 685                smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
 686                reason_code = smcr_clnt_conf_first_link(smc);
 687                smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
 688                if (reason_code)
 689                        return smc_connect_abort(smc, reason_code,
 690                                                 ini->cln_first_contact);
 691        }
 692        mutex_unlock(&smc_client_lgr_pending);
 693
 694        smc_copy_sock_settings_to_clc(smc);
 695        smc->connect_nonblock = 0;
 696        if (smc->sk.sk_state == SMC_INIT)
 697                smc->sk.sk_state = SMC_ACTIVE;
 698
 699        return 0;
 700}
 701
 702/* setup for ISM connection of client */
 703static int smc_connect_ism(struct smc_sock *smc,
 704                           struct smc_clc_msg_accept_confirm *aclc,
 705                           struct smc_init_info *ini)
 706{
 707        int rc = 0;
 708
 709        ini->is_smcd = true;
 710        ini->ism_gid = aclc->gid;
 711        ini->srv_first_contact = aclc->hdr.flag;
 712
 713        /* there is only one lgr role for SMC-D; use server lock */
 714        mutex_lock(&smc_server_lgr_pending);
 715        rc = smc_conn_create(smc, ini);
 716        if (rc) {
 717                mutex_unlock(&smc_server_lgr_pending);
 718                return rc;
 719        }
 720
 721        /* Create send and receive buffers */
 722        rc = smc_buf_create(smc, true);
 723        if (rc)
 724                return smc_connect_abort(smc, (rc == -ENOSPC) ?
 725                                              SMC_CLC_DECL_MAX_DMB :
 726                                              SMC_CLC_DECL_MEM,
 727                                         ini->cln_first_contact);
 728
 729        smc_conn_save_peer_info(smc, aclc);
 730        smc_close_init(smc);
 731        smc_rx_init(smc);
 732        smc_tx_init(smc);
 733
 734        rc = smc_clc_send_confirm(smc);
 735        if (rc)
 736                return smc_connect_abort(smc, rc, ini->cln_first_contact);
 737        mutex_unlock(&smc_server_lgr_pending);
 738
 739        smc_copy_sock_settings_to_clc(smc);
 740        smc->connect_nonblock = 0;
 741        if (smc->sk.sk_state == SMC_INIT)
 742                smc->sk.sk_state = SMC_ACTIVE;
 743
 744        return 0;
 745}
 746
 747/* perform steps before actually connecting */
 748static int __smc_connect(struct smc_sock *smc)
 749{
 750        bool ism_supported = false, rdma_supported = false;
 751        struct smc_clc_msg_accept_confirm aclc;
 752        struct smc_init_info ini = {0};
 753        int smc_type;
 754        int rc = 0;
 755
 756        if (smc->use_fallback)
 757                return smc_connect_fallback(smc, smc->fallback_rsn);
 758
 759        /* if peer has not signalled SMC-capability, fall back */
 760        if (!tcp_sk(smc->clcsock->sk)->syn_smc)
 761                return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
 762
 763        /* IPSec connections opt out of SMC-R optimizations */
 764        if (using_ipsec(smc))
 765                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
 766
 767        /* get vlan id from IP device */
 768        if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
 769                return smc_connect_decline_fallback(smc,
 770                                                    SMC_CLC_DECL_GETVLANERR);
 771
 772        /* check if there is an ism device available */
 773        if (!smc_find_ism_device(smc, &ini) &&
 774            !smc_connect_ism_vlan_setup(smc, &ini)) {
 775                /* ISM is supported for this connection */
 776                ism_supported = true;
 777                smc_type = SMC_TYPE_D;
 778        }
 779
 780        /* check if there is a rdma device available */
 781        if (!smc_find_rdma_device(smc, &ini)) {
 782                /* RDMA is supported for this connection */
 783                rdma_supported = true;
 784                if (ism_supported)
 785                        smc_type = SMC_TYPE_B; /* both */
 786                else
 787                        smc_type = SMC_TYPE_R; /* only RDMA */
 788        }
 789
 790        /* if neither ISM nor RDMA are supported, fallback */
 791        if (!rdma_supported && !ism_supported)
 792                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
 793
 794        /* perform CLC handshake */
 795        rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
 796        if (rc) {
 797                smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 798                return smc_connect_decline_fallback(smc, rc);
 799        }
 800
 801        /* depending on previous steps, connect using rdma or ism */
 802        if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
 803                rc = smc_connect_rdma(smc, &aclc, &ini);
 804        else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
 805                rc = smc_connect_ism(smc, &aclc, &ini);
 806        else
 807                rc = SMC_CLC_DECL_MODEUNSUPP;
 808        if (rc) {
 809                smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 810                return smc_connect_decline_fallback(smc, rc);
 811        }
 812
 813        smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 814        return 0;
 815}
 816
 817static void smc_connect_work(struct work_struct *work)
 818{
 819        struct smc_sock *smc = container_of(work, struct smc_sock,
 820                                            connect_work);
 821        long timeo = smc->sk.sk_sndtimeo;
 822        int rc = 0;
 823
 824        if (!timeo)
 825                timeo = MAX_SCHEDULE_TIMEOUT;
 826        lock_sock(smc->clcsock->sk);
 827        if (smc->clcsock->sk->sk_err) {
 828                smc->sk.sk_err = smc->clcsock->sk->sk_err;
 829        } else if ((1 << smc->clcsock->sk->sk_state) &
 830                                        (TCPF_SYN_SENT | TCP_SYN_RECV)) {
 831                rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
 832                if ((rc == -EPIPE) &&
 833                    ((1 << smc->clcsock->sk->sk_state) &
 834                                        (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
 835                        rc = 0;
 836        }
 837        release_sock(smc->clcsock->sk);
 838        lock_sock(&smc->sk);
 839        if (rc != 0 || smc->sk.sk_err) {
 840                smc->sk.sk_state = SMC_CLOSED;
 841                if (rc == -EPIPE || rc == -EAGAIN)
 842                        smc->sk.sk_err = EPIPE;
 843                else if (signal_pending(current))
 844                        smc->sk.sk_err = -sock_intr_errno(timeo);
 845                sock_put(&smc->sk); /* passive closing */
 846                goto out;
 847        }
 848
 849        rc = __smc_connect(smc);
 850        if (rc < 0)
 851                smc->sk.sk_err = -rc;
 852
 853out:
 854        if (!sock_flag(&smc->sk, SOCK_DEAD)) {
 855                if (smc->sk.sk_err) {
 856                        smc->sk.sk_state_change(&smc->sk);
 857                } else { /* allow polling before and after fallback decision */
 858                        smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
 859                        smc->sk.sk_write_space(&smc->sk);
 860                }
 861        }
 862        release_sock(&smc->sk);
 863}
 864
 865static int smc_connect(struct socket *sock, struct sockaddr *addr,
 866                       int alen, int flags)
 867{
 868        struct sock *sk = sock->sk;
 869        struct smc_sock *smc;
 870        int rc = -EINVAL;
 871
 872        smc = smc_sk(sk);
 873
 874        /* separate smc parameter checking to be safe */
 875        if (alen < sizeof(addr->sa_family))
 876                goto out_err;
 877        if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 878                goto out_err;
 879
 880        lock_sock(sk);
 881        switch (sk->sk_state) {
 882        default:
 883                goto out;
 884        case SMC_ACTIVE:
 885                rc = -EISCONN;
 886                goto out;
 887        case SMC_INIT:
 888                rc = 0;
 889                break;
 890        }
 891
 892        smc_copy_sock_settings_to_clc(smc);
 893        tcp_sk(smc->clcsock->sk)->syn_smc = 1;
 894        if (smc->connect_nonblock) {
 895                rc = -EALREADY;
 896                goto out;
 897        }
 898        rc = kernel_connect(smc->clcsock, addr, alen, flags);
 899        if (rc && rc != -EINPROGRESS)
 900                goto out;
 901
 902        sock_hold(&smc->sk); /* sock put in passive closing */
 903        if (smc->use_fallback)
 904                goto out;
 905        if (flags & O_NONBLOCK) {
 906                if (schedule_work(&smc->connect_work))
 907                        smc->connect_nonblock = 1;
 908                rc = -EINPROGRESS;
 909        } else {
 910                rc = __smc_connect(smc);
 911                if (rc < 0)
 912                        goto out;
 913                else
 914                        rc = 0; /* success cases including fallback */
 915        }
 916
 917out:
 918        release_sock(sk);
 919out_err:
 920        return rc;
 921}
 922
 923static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 924{
 925        struct socket *new_clcsock = NULL;
 926        struct sock *lsk = &lsmc->sk;
 927        struct sock *new_sk;
 928        int rc = -EINVAL;
 929
 930        release_sock(lsk);
 931        new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
 932        if (!new_sk) {
 933                rc = -ENOMEM;
 934                lsk->sk_err = ENOMEM;
 935                *new_smc = NULL;
 936                lock_sock(lsk);
 937                goto out;
 938        }
 939        *new_smc = smc_sk(new_sk);
 940
 941        mutex_lock(&lsmc->clcsock_release_lock);
 942        if (lsmc->clcsock)
 943                rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
 944        mutex_unlock(&lsmc->clcsock_release_lock);
 945        lock_sock(lsk);
 946        if  (rc < 0)
 947                lsk->sk_err = -rc;
 948        if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
 949                new_sk->sk_prot->unhash(new_sk);
 950                if (new_clcsock)
 951                        sock_release(new_clcsock);
 952                new_sk->sk_state = SMC_CLOSED;
 953                sock_set_flag(new_sk, SOCK_DEAD);
 954                sock_put(new_sk); /* final */
 955                *new_smc = NULL;
 956                goto out;
 957        }
 958
 959        (*new_smc)->clcsock = new_clcsock;
 960out:
 961        return rc;
 962}
 963
 964/* add a just created sock to the accept queue of the listen sock as
 965 * candidate for a following socket accept call from user space
 966 */
 967static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
 968{
 969        struct smc_sock *par = smc_sk(parent);
 970
 971        sock_hold(sk); /* sock_put in smc_accept_unlink () */
 972        spin_lock(&par->accept_q_lock);
 973        list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
 974        spin_unlock(&par->accept_q_lock);
 975        sk_acceptq_added(parent);
 976}
 977
 978/* remove a socket from the accept queue of its parental listening socket */
 979static void smc_accept_unlink(struct sock *sk)
 980{
 981        struct smc_sock *par = smc_sk(sk)->listen_smc;
 982
 983        spin_lock(&par->accept_q_lock);
 984        list_del_init(&smc_sk(sk)->accept_q);
 985        spin_unlock(&par->accept_q_lock);
 986        sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
 987        sock_put(sk); /* sock_hold in smc_accept_enqueue */
 988}
 989
 990/* remove a sock from the accept queue to bind it to a new socket created
 991 * for a socket accept call from user space
 992 */
 993struct sock *smc_accept_dequeue(struct sock *parent,
 994                                struct socket *new_sock)
 995{
 996        struct smc_sock *isk, *n;
 997        struct sock *new_sk;
 998
 999        list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
1000                new_sk = (struct sock *)isk;
1001
1002                smc_accept_unlink(new_sk);
1003                if (new_sk->sk_state == SMC_CLOSED) {
1004                        new_sk->sk_prot->unhash(new_sk);
1005                        if (isk->clcsock) {
1006                                sock_release(isk->clcsock);
1007                                isk->clcsock = NULL;
1008                        }
1009                        sock_put(new_sk); /* final */
1010                        continue;
1011                }
1012                if (new_sock) {
1013                        sock_graft(new_sk, new_sock);
1014                        if (isk->use_fallback) {
1015                                smc_sk(new_sk)->clcsock->file = new_sock->file;
1016                                isk->clcsock->file->private_data = isk->clcsock;
1017                        }
1018                }
1019                return new_sk;
1020        }
1021        return NULL;
1022}
1023
1024/* clean up for a created but never accepted sock */
1025void smc_close_non_accepted(struct sock *sk)
1026{
1027        struct smc_sock *smc = smc_sk(sk);
1028
1029        sock_hold(sk); /* sock_put below */
1030        lock_sock(sk);
1031        if (!sk->sk_lingertime)
1032                /* wait for peer closing */
1033                sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
1034        __smc_release(smc);
1035        release_sock(sk);
1036        sock_put(sk); /* sock_hold above */
1037        sock_put(sk); /* final sock_put */
1038}
1039
1040static int smcr_serv_conf_first_link(struct smc_sock *smc)
1041{
1042        struct smc_link *link = smc->conn.lnk;
1043        struct smc_llc_qentry *qentry;
1044        int rc;
1045
1046        if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
1047                return SMC_CLC_DECL_ERR_REGRMB;
1048
1049        /* send CONFIRM LINK request to client over the RoCE fabric */
1050        rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
1051        if (rc < 0)
1052                return SMC_CLC_DECL_TIMEOUT_CL;
1053
1054        /* receive CONFIRM LINK response from client over the RoCE fabric */
1055        qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
1056                              SMC_LLC_CONFIRM_LINK);
1057        if (!qentry) {
1058                struct smc_clc_msg_decline dclc;
1059
1060                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1061                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1062                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
1063        }
1064        smc_llc_save_peer_uid(qentry);
1065        rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
1066        smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
1067        if (rc)
1068                return SMC_CLC_DECL_RMBE_EC;
1069
1070        /* confirm_rkey is implicit on 1st contact */
1071        smc->conn.rmb_desc->is_conf_rkey = true;
1072
1073        smc_llc_link_active(link);
1074        smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
1075
1076        /* initial contact - try to establish second link */
1077        smc_llc_srv_add_link(link);
1078        return 0;
1079}
1080
1081/* listen worker: finish */
1082static void smc_listen_out(struct smc_sock *new_smc)
1083{
1084        struct smc_sock *lsmc = new_smc->listen_smc;
1085        struct sock *newsmcsk = &new_smc->sk;
1086
1087        if (lsmc->sk.sk_state == SMC_LISTEN) {
1088                lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1089                smc_accept_enqueue(&lsmc->sk, newsmcsk);
1090                release_sock(&lsmc->sk);
1091        } else { /* no longer listening */
1092                smc_close_non_accepted(newsmcsk);
1093        }
1094
1095        /* Wake up accept */
1096        lsmc->sk.sk_data_ready(&lsmc->sk);
1097        sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1098}
1099
1100/* listen worker: finish in state connected */
1101static void smc_listen_out_connected(struct smc_sock *new_smc)
1102{
1103        struct sock *newsmcsk = &new_smc->sk;
1104
1105        sk_refcnt_debug_inc(newsmcsk);
1106        if (newsmcsk->sk_state == SMC_INIT)
1107                newsmcsk->sk_state = SMC_ACTIVE;
1108
1109        smc_listen_out(new_smc);
1110}
1111
1112/* listen worker: finish in error state */
1113static void smc_listen_out_err(struct smc_sock *new_smc)
1114{
1115        struct sock *newsmcsk = &new_smc->sk;
1116
1117        if (newsmcsk->sk_state == SMC_INIT)
1118                sock_put(&new_smc->sk); /* passive closing */
1119        newsmcsk->sk_state = SMC_CLOSED;
1120
1121        smc_listen_out(new_smc);
1122}
1123
1124/* listen worker: decline and fall back if possible */
1125static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1126                               int local_contact)
1127{
1128        /* RDMA setup failed, switch back to TCP */
1129        if (local_contact == SMC_FIRST_CONTACT)
1130                smc_lgr_cleanup_early(&new_smc->conn);
1131        else
1132                smc_conn_free(&new_smc->conn);
1133        if (reason_code < 0) { /* error, no fallback possible */
1134                smc_listen_out_err(new_smc);
1135                return;
1136        }
1137        smc_switch_to_fallback(new_smc);
1138        new_smc->fallback_rsn = reason_code;
1139        if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1140                if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1141                        smc_listen_out_err(new_smc);
1142                        return;
1143                }
1144        }
1145        smc_listen_out_connected(new_smc);
1146}
1147
1148/* listen worker: check prefixes */
1149static int smc_listen_prfx_check(struct smc_sock *new_smc,
1150                                 struct smc_clc_msg_proposal *pclc)
1151{
1152        struct smc_clc_msg_proposal_prefix *pclc_prfx;
1153        struct socket *newclcsock = new_smc->clcsock;
1154
1155        pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1156        if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1157                return SMC_CLC_DECL_DIFFPREFIX;
1158
1159        return 0;
1160}
1161
1162/* listen worker: initialize connection and buffers */
1163static int smc_listen_rdma_init(struct smc_sock *new_smc,
1164                                struct smc_init_info *ini)
1165{
1166        int rc;
1167
1168        /* allocate connection / link group */
1169        rc = smc_conn_create(new_smc, ini);
1170        if (rc)
1171                return rc;
1172
1173        /* create send buffer and rmb */
1174        if (smc_buf_create(new_smc, false))
1175                return SMC_CLC_DECL_MEM;
1176
1177        return 0;
1178}
1179
1180/* listen worker: initialize connection and buffers for SMC-D */
1181static int smc_listen_ism_init(struct smc_sock *new_smc,
1182                               struct smc_clc_msg_proposal *pclc,
1183                               struct smc_init_info *ini)
1184{
1185        struct smc_clc_msg_smcd *pclc_smcd;
1186        int rc;
1187
1188        pclc_smcd = smc_get_clc_msg_smcd(pclc);
1189        ini->ism_gid = pclc_smcd->gid;
1190        rc = smc_conn_create(new_smc, ini);
1191        if (rc)
1192                return rc;
1193
1194        /* Check if peer can be reached via ISM device */
1195        if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1196                            new_smc->conn.lgr->vlan_id,
1197                            new_smc->conn.lgr->smcd)) {
1198                if (ini->cln_first_contact == SMC_FIRST_CONTACT)
1199                        smc_lgr_cleanup_early(&new_smc->conn);
1200                else
1201                        smc_conn_free(&new_smc->conn);
1202                return SMC_CLC_DECL_SMCDNOTALK;
1203        }
1204
1205        /* Create send and receive buffers */
1206        rc = smc_buf_create(new_smc, true);
1207        if (rc) {
1208                if (ini->cln_first_contact == SMC_FIRST_CONTACT)
1209                        smc_lgr_cleanup_early(&new_smc->conn);
1210                else
1211                        smc_conn_free(&new_smc->conn);
1212                return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
1213                                         SMC_CLC_DECL_MEM;
1214        }
1215
1216        return 0;
1217}
1218
1219/* listen worker: register buffers */
1220static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
1221{
1222        struct smc_connection *conn = &new_smc->conn;
1223
1224        if (local_contact != SMC_FIRST_CONTACT) {
1225                if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
1226                        return SMC_CLC_DECL_ERR_REGRMB;
1227        }
1228        smc_rmb_sync_sg_for_device(&new_smc->conn);
1229
1230        return 0;
1231}
1232
1233/* listen worker: finish RDMA setup */
1234static int smc_listen_rdma_finish(struct smc_sock *new_smc,
1235                                  struct smc_clc_msg_accept_confirm *cclc,
1236                                  int local_contact)
1237{
1238        struct smc_link *link = new_smc->conn.lnk;
1239        int reason_code = 0;
1240
1241        if (local_contact == SMC_FIRST_CONTACT)
1242                smc_link_save_peer_info(link, cclc);
1243
1244        if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
1245                reason_code = SMC_CLC_DECL_ERR_RTOK;
1246                goto decline;
1247        }
1248
1249        if (local_contact == SMC_FIRST_CONTACT) {
1250                if (smc_ib_ready_link(link)) {
1251                        reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1252                        goto decline;
1253                }
1254                /* QP confirmation over RoCE fabric */
1255                smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
1256                reason_code = smcr_serv_conf_first_link(new_smc);
1257                smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
1258                if (reason_code)
1259                        goto decline;
1260        }
1261        return 0;
1262
1263decline:
1264        smc_listen_decline(new_smc, reason_code, local_contact);
1265        return reason_code;
1266}
1267
1268/* setup for RDMA connection of server */
1269static void smc_listen_work(struct work_struct *work)
1270{
1271        struct smc_sock *new_smc = container_of(work, struct smc_sock,
1272                                                smc_listen_work);
1273        struct socket *newclcsock = new_smc->clcsock;
1274        struct smc_clc_msg_accept_confirm cclc;
1275        struct smc_clc_msg_proposal *pclc;
1276        struct smc_init_info ini = {0};
1277        bool ism_supported = false;
1278        u8 buf[SMC_CLC_MAX_LEN];
1279        int rc = 0;
1280
1281        if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
1282                return smc_listen_out_err(new_smc);
1283
1284        if (new_smc->use_fallback) {
1285                smc_listen_out_connected(new_smc);
1286                return;
1287        }
1288
1289        /* check if peer is smc capable */
1290        if (!tcp_sk(newclcsock->sk)->syn_smc) {
1291                smc_switch_to_fallback(new_smc);
1292                new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
1293                smc_listen_out_connected(new_smc);
1294                return;
1295        }
1296
1297        /* do inband token exchange -
1298         * wait for and receive SMC Proposal CLC message
1299         */
1300        pclc = (struct smc_clc_msg_proposal *)&buf;
1301        rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
1302                              SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
1303        if (rc)
1304                goto out_decl;
1305
1306        /* IPSec connections opt out of SMC-R optimizations */
1307        if (using_ipsec(new_smc)) {
1308                rc = SMC_CLC_DECL_IPSEC;
1309                goto out_decl;
1310        }
1311
1312        /* check for matching IP prefix and subnet length */
1313        rc = smc_listen_prfx_check(new_smc, pclc);
1314        if (rc)
1315                goto out_decl;
1316
1317        /* get vlan id from IP device */
1318        if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
1319                rc = SMC_CLC_DECL_GETVLANERR;
1320                goto out_decl;
1321        }
1322
1323        mutex_lock(&smc_server_lgr_pending);
1324        smc_close_init(new_smc);
1325        smc_rx_init(new_smc);
1326        smc_tx_init(new_smc);
1327
1328        /* check if ISM is available */
1329        if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
1330                ini.is_smcd = true; /* prepare ISM check */
1331                rc = smc_find_ism_device(new_smc, &ini);
1332                if (!rc)
1333                        rc = smc_listen_ism_init(new_smc, pclc, &ini);
1334                if (!rc)
1335                        ism_supported = true;
1336                else if (pclc->hdr.path == SMC_TYPE_D)
1337                        goto out_unlock; /* skip RDMA and decline */
1338        }
1339
1340        /* check if RDMA is available */
1341        if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
1342                /* prepare RDMA check */
1343                ini.is_smcd = false;
1344                ini.ism_dev = NULL;
1345                ini.ib_lcl = &pclc->lcl;
1346                rc = smc_find_rdma_device(new_smc, &ini);
1347                if (rc) {
1348                        /* no RDMA device found */
1349                        if (pclc->hdr.path == SMC_TYPE_B)
1350                                /* neither ISM nor RDMA device found */
1351                                rc = SMC_CLC_DECL_NOSMCDEV;
1352                        goto out_unlock;
1353                }
1354                rc = smc_listen_rdma_init(new_smc, &ini);
1355                if (rc)
1356                        goto out_unlock;
1357                rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact);
1358                if (rc)
1359                        goto out_unlock;
1360        }
1361
1362        /* send SMC Accept CLC message */
1363        rc = smc_clc_send_accept(new_smc, ini.cln_first_contact);
1364        if (rc)
1365                goto out_unlock;
1366
1367        /* SMC-D does not need this lock any more */
1368        if (ism_supported)
1369                mutex_unlock(&smc_server_lgr_pending);
1370
1371        /* receive SMC Confirm CLC message */
1372        rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1373                              SMC_CLC_CONFIRM, CLC_WAIT_TIME);
1374        if (rc) {
1375                if (!ism_supported)
1376                        goto out_unlock;
1377                goto out_decl;
1378        }
1379
1380        /* finish worker */
1381        if (!ism_supported) {
1382                rc = smc_listen_rdma_finish(new_smc, &cclc,
1383                                            ini.cln_first_contact);
1384                mutex_unlock(&smc_server_lgr_pending);
1385                if (rc)
1386                        return;
1387        }
1388        smc_conn_save_peer_info(new_smc, &cclc);
1389        smc_listen_out_connected(new_smc);
1390        return;
1391
1392out_unlock:
1393        mutex_unlock(&smc_server_lgr_pending);
1394out_decl:
1395        smc_listen_decline(new_smc, rc, ini.cln_first_contact);
1396}
1397
1398static void smc_tcp_listen_work(struct work_struct *work)
1399{
1400        struct smc_sock *lsmc = container_of(work, struct smc_sock,
1401                                             tcp_listen_work);
1402        struct sock *lsk = &lsmc->sk;
1403        struct smc_sock *new_smc;
1404        int rc = 0;
1405
1406        lock_sock(lsk);
1407        while (lsk->sk_state == SMC_LISTEN) {
1408                rc = smc_clcsock_accept(lsmc, &new_smc);
1409                if (rc)
1410                        goto out;
1411                if (!new_smc)
1412                        continue;
1413
1414                new_smc->listen_smc = lsmc;
1415                new_smc->use_fallback = lsmc->use_fallback;
1416                new_smc->fallback_rsn = lsmc->fallback_rsn;
1417                sock_hold(lsk); /* sock_put in smc_listen_work */
1418                INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1419                smc_copy_sock_settings_to_smc(new_smc);
1420                new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1421                new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
1422                sock_hold(&new_smc->sk); /* sock_put in passive closing */
1423                if (!schedule_work(&new_smc->smc_listen_work))
1424                        sock_put(&new_smc->sk);
1425        }
1426
1427out:
1428        release_sock(lsk);
1429        sock_put(&lsmc->sk); /* sock_hold in smc_listen */
1430}
1431
1432static int smc_listen(struct socket *sock, int backlog)
1433{
1434        struct sock *sk = sock->sk;
1435        struct smc_sock *smc;
1436        int rc;
1437
1438        smc = smc_sk(sk);
1439        lock_sock(sk);
1440
1441        rc = -EINVAL;
1442        if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
1443            smc->connect_nonblock)
1444                goto out;
1445
1446        rc = 0;
1447        if (sk->sk_state == SMC_LISTEN) {
1448                sk->sk_max_ack_backlog = backlog;
1449                goto out;
1450        }
1451        /* some socket options are handled in core, so we could not apply
1452         * them to the clc socket -- copy smc socket options to clc socket
1453         */
1454        smc_copy_sock_settings_to_clc(smc);
1455        if (!smc->use_fallback)
1456                tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1457
1458        rc = kernel_listen(smc->clcsock, backlog);
1459        if (rc)
1460                goto out;
1461        sk->sk_max_ack_backlog = backlog;
1462        sk->sk_ack_backlog = 0;
1463        sk->sk_state = SMC_LISTEN;
1464        sock_hold(sk); /* sock_hold in tcp_listen_worker */
1465        if (!schedule_work(&smc->tcp_listen_work))
1466                sock_put(sk);
1467
1468out:
1469        release_sock(sk);
1470        return rc;
1471}
1472
1473static int smc_accept(struct socket *sock, struct socket *new_sock,
1474                      int flags, bool kern)
1475{
1476        struct sock *sk = sock->sk, *nsk;
1477        DECLARE_WAITQUEUE(wait, current);
1478        struct smc_sock *lsmc;
1479        long timeo;
1480        int rc = 0;
1481
1482        lsmc = smc_sk(sk);
1483        sock_hold(sk); /* sock_put below */
1484        lock_sock(sk);
1485
1486        if (lsmc->sk.sk_state != SMC_LISTEN) {
1487                rc = -EINVAL;
1488                release_sock(sk);
1489                goto out;
1490        }
1491
1492        /* Wait for an incoming connection */
1493        timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1494        add_wait_queue_exclusive(sk_sleep(sk), &wait);
1495        while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1496                set_current_state(TASK_INTERRUPTIBLE);
1497                if (!timeo) {
1498                        rc = -EAGAIN;
1499                        break;
1500                }
1501                release_sock(sk);
1502                timeo = schedule_timeout(timeo);
1503                /* wakeup by sk_data_ready in smc_listen_work() */
1504                sched_annotate_sleep();
1505                lock_sock(sk);
1506                if (signal_pending(current)) {
1507                        rc = sock_intr_errno(timeo);
1508                        break;
1509                }
1510        }
1511        set_current_state(TASK_RUNNING);
1512        remove_wait_queue(sk_sleep(sk), &wait);
1513
1514        if (!rc)
1515                rc = sock_error(nsk);
1516        release_sock(sk);
1517        if (rc)
1518                goto out;
1519
1520        if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1521                /* wait till data arrives on the socket */
1522                timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1523                                                                MSEC_PER_SEC);
1524                if (smc_sk(nsk)->use_fallback) {
1525                        struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1526
1527                        lock_sock(clcsk);
1528                        if (skb_queue_empty(&clcsk->sk_receive_queue))
1529                                sk_wait_data(clcsk, &timeo, NULL);
1530                        release_sock(clcsk);
1531                } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1532                        lock_sock(nsk);
1533                        smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
1534                        release_sock(nsk);
1535                }
1536        }
1537
1538out:
1539        sock_put(sk); /* sock_hold above */
1540        return rc;
1541}
1542
1543static int smc_getname(struct socket *sock, struct sockaddr *addr,
1544                       int peer)
1545{
1546        struct smc_sock *smc;
1547
1548        if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1549            (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1550                return -ENOTCONN;
1551
1552        smc = smc_sk(sock->sk);
1553
1554        return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1555}
1556
1557static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1558{
1559        struct sock *sk = sock->sk;
1560        struct smc_sock *smc;
1561        int rc = -EPIPE;
1562
1563        smc = smc_sk(sk);
1564        lock_sock(sk);
1565        if ((sk->sk_state != SMC_ACTIVE) &&
1566            (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1567            (sk->sk_state != SMC_INIT))
1568                goto out;
1569
1570        if (msg->msg_flags & MSG_FASTOPEN) {
1571                if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
1572                        smc_switch_to_fallback(smc);
1573                        smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1574                } else {
1575                        rc = -EINVAL;
1576                        goto out;
1577                }
1578        }
1579
1580        if (smc->use_fallback)
1581                rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1582        else
1583                rc = smc_tx_sendmsg(smc, msg, len);
1584out:
1585        release_sock(sk);
1586        return rc;
1587}
1588
1589static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1590                       int flags)
1591{
1592        struct sock *sk = sock->sk;
1593        struct smc_sock *smc;
1594        int rc = -ENOTCONN;
1595
1596        smc = smc_sk(sk);
1597        lock_sock(sk);
1598        if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1599                /* socket was connected before, no more data to read */
1600                rc = 0;
1601                goto out;
1602        }
1603        if ((sk->sk_state == SMC_INIT) ||
1604            (sk->sk_state == SMC_LISTEN) ||
1605            (sk->sk_state == SMC_CLOSED))
1606                goto out;
1607
1608        if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1609                rc = 0;
1610                goto out;
1611        }
1612
1613        if (smc->use_fallback) {
1614                rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1615        } else {
1616                msg->msg_namelen = 0;
1617                rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1618        }
1619
1620out:
1621        release_sock(sk);
1622        return rc;
1623}
1624
1625static __poll_t smc_accept_poll(struct sock *parent)
1626{
1627        struct smc_sock *isk = smc_sk(parent);
1628        __poll_t mask = 0;
1629
1630        spin_lock(&isk->accept_q_lock);
1631        if (!list_empty(&isk->accept_q))
1632                mask = EPOLLIN | EPOLLRDNORM;
1633        spin_unlock(&isk->accept_q_lock);
1634
1635        return mask;
1636}
1637
1638static __poll_t smc_poll(struct file *file, struct socket *sock,
1639                             poll_table *wait)
1640{
1641        struct sock *sk = sock->sk;
1642        struct smc_sock *smc;
1643        __poll_t mask = 0;
1644
1645        if (!sk)
1646                return EPOLLNVAL;
1647
1648        smc = smc_sk(sock->sk);
1649        if (smc->use_fallback) {
1650                /* delegate to CLC child sock */
1651                mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1652                sk->sk_err = smc->clcsock->sk->sk_err;
1653        } else {
1654                if (sk->sk_state != SMC_CLOSED)
1655                        sock_poll_wait(file, sock, wait);
1656                if (sk->sk_err)
1657                        mask |= EPOLLERR;
1658                if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1659                    (sk->sk_state == SMC_CLOSED))
1660                        mask |= EPOLLHUP;
1661                if (sk->sk_state == SMC_LISTEN) {
1662                        /* woken up by sk_data_ready in smc_listen_work() */
1663                        mask |= smc_accept_poll(sk);
1664                } else if (smc->use_fallback) { /* as result of connect_work()*/
1665                        mask |= smc->clcsock->ops->poll(file, smc->clcsock,
1666                                                           wait);
1667                        sk->sk_err = smc->clcsock->sk->sk_err;
1668                } else {
1669                        if ((sk->sk_state != SMC_INIT &&
1670                             atomic_read(&smc->conn.sndbuf_space)) ||
1671                            sk->sk_shutdown & SEND_SHUTDOWN) {
1672                                mask |= EPOLLOUT | EPOLLWRNORM;
1673                        } else {
1674                                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1675                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1676                        }
1677                        if (atomic_read(&smc->conn.bytes_to_rcv))
1678                                mask |= EPOLLIN | EPOLLRDNORM;
1679                        if (sk->sk_shutdown & RCV_SHUTDOWN)
1680                                mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1681                        if (sk->sk_state == SMC_APPCLOSEWAIT1)
1682                                mask |= EPOLLIN;
1683                        if (smc->conn.urg_state == SMC_URG_VALID)
1684                                mask |= EPOLLPRI;
1685                }
1686        }
1687
1688        return mask;
1689}
1690
1691static int smc_shutdown(struct socket *sock, int how)
1692{
1693        struct sock *sk = sock->sk;
1694        struct smc_sock *smc;
1695        int rc = -EINVAL;
1696        int rc1 = 0;
1697
1698        smc = smc_sk(sk);
1699
1700        if ((how < SHUT_RD) || (how > SHUT_RDWR))
1701                return rc;
1702
1703        lock_sock(sk);
1704
1705        rc = -ENOTCONN;
1706        if ((sk->sk_state != SMC_ACTIVE) &&
1707            (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1708            (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1709            (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1710            (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1711            (sk->sk_state != SMC_APPFINCLOSEWAIT))
1712                goto out;
1713        if (smc->use_fallback) {
1714                rc = kernel_sock_shutdown(smc->clcsock, how);
1715                sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1716                if (sk->sk_shutdown == SHUTDOWN_MASK)
1717                        sk->sk_state = SMC_CLOSED;
1718                goto out;
1719        }
1720        switch (how) {
1721        case SHUT_RDWR:         /* shutdown in both directions */
1722                rc = smc_close_active(smc);
1723                break;
1724        case SHUT_WR:
1725                rc = smc_close_shutdown_write(smc);
1726                break;
1727        case SHUT_RD:
1728                rc = 0;
1729                /* nothing more to do because peer is not involved */
1730                break;
1731        }
1732        if (smc->clcsock)
1733                rc1 = kernel_sock_shutdown(smc->clcsock, how);
1734        /* map sock_shutdown_cmd constants to sk_shutdown value range */
1735        sk->sk_shutdown |= how + 1;
1736
1737out:
1738        release_sock(sk);
1739        return rc ? rc : rc1;
1740}
1741
1742static int smc_setsockopt(struct socket *sock, int level, int optname,
1743                          sockptr_t optval, unsigned int optlen)
1744{
1745        struct sock *sk = sock->sk;
1746        struct smc_sock *smc;
1747        int val, rc;
1748
1749        smc = smc_sk(sk);
1750
1751        /* generic setsockopts reaching us here always apply to the
1752         * CLC socket
1753         */
1754        if (unlikely(!smc->clcsock->ops->setsockopt))
1755                rc = -EOPNOTSUPP;
1756        else
1757                rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1758                                                   optval, optlen);
1759        if (smc->clcsock->sk->sk_err) {
1760                sk->sk_err = smc->clcsock->sk->sk_err;
1761                sk->sk_error_report(sk);
1762        }
1763
1764        if (optlen < sizeof(int))
1765                return -EINVAL;
1766        if (copy_from_sockptr(&val, optval, sizeof(int)))
1767                return -EFAULT;
1768
1769        lock_sock(sk);
1770        if (rc || smc->use_fallback)
1771                goto out;
1772        switch (optname) {
1773        case TCP_ULP:
1774        case TCP_FASTOPEN:
1775        case TCP_FASTOPEN_CONNECT:
1776        case TCP_FASTOPEN_KEY:
1777        case TCP_FASTOPEN_NO_COOKIE:
1778                /* option not supported by SMC */
1779                if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
1780                        smc_switch_to_fallback(smc);
1781                        smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1782                } else {
1783                        rc = -EINVAL;
1784                }
1785                break;
1786        case TCP_NODELAY:
1787                if (sk->sk_state != SMC_INIT &&
1788                    sk->sk_state != SMC_LISTEN &&
1789                    sk->sk_state != SMC_CLOSED) {
1790                        if (val)
1791                                mod_delayed_work(system_wq, &smc->conn.tx_work,
1792                                                 0);
1793                }
1794                break;
1795        case TCP_CORK:
1796                if (sk->sk_state != SMC_INIT &&
1797                    sk->sk_state != SMC_LISTEN &&
1798                    sk->sk_state != SMC_CLOSED) {
1799                        if (!val)
1800                                mod_delayed_work(system_wq, &smc->conn.tx_work,
1801                                                 0);
1802                }
1803                break;
1804        case TCP_DEFER_ACCEPT:
1805                smc->sockopt_defer_accept = val;
1806                break;
1807        default:
1808                break;
1809        }
1810out:
1811        release_sock(sk);
1812
1813        return rc;
1814}
1815
1816static int smc_getsockopt(struct socket *sock, int level, int optname,
1817                          char __user *optval, int __user *optlen)
1818{
1819        struct smc_sock *smc;
1820
1821        smc = smc_sk(sock->sk);
1822        /* socket options apply to the CLC socket */
1823        if (unlikely(!smc->clcsock->ops->getsockopt))
1824                return -EOPNOTSUPP;
1825        return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1826                                             optval, optlen);
1827}
1828
1829static int smc_ioctl(struct socket *sock, unsigned int cmd,
1830                     unsigned long arg)
1831{
1832        union smc_host_cursor cons, urg;
1833        struct smc_connection *conn;
1834        struct smc_sock *smc;
1835        int answ;
1836
1837        smc = smc_sk(sock->sk);
1838        conn = &smc->conn;
1839        lock_sock(&smc->sk);
1840        if (smc->use_fallback) {
1841                if (!smc->clcsock) {
1842                        release_sock(&smc->sk);
1843                        return -EBADF;
1844                }
1845                answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1846                release_sock(&smc->sk);
1847                return answ;
1848        }
1849        switch (cmd) {
1850        case SIOCINQ: /* same as FIONREAD */
1851                if (smc->sk.sk_state == SMC_LISTEN) {
1852                        release_sock(&smc->sk);
1853                        return -EINVAL;
1854                }
1855                if (smc->sk.sk_state == SMC_INIT ||
1856                    smc->sk.sk_state == SMC_CLOSED)
1857                        answ = 0;
1858                else
1859                        answ = atomic_read(&smc->conn.bytes_to_rcv);
1860                break;
1861        case SIOCOUTQ:
1862                /* output queue size (not send + not acked) */
1863                if (smc->sk.sk_state == SMC_LISTEN) {
1864                        release_sock(&smc->sk);
1865                        return -EINVAL;
1866                }
1867                if (smc->sk.sk_state == SMC_INIT ||
1868                    smc->sk.sk_state == SMC_CLOSED)
1869                        answ = 0;
1870                else
1871                        answ = smc->conn.sndbuf_desc->len -
1872                                        atomic_read(&smc->conn.sndbuf_space);
1873                break;
1874        case SIOCOUTQNSD:
1875                /* output queue size (not send only) */
1876                if (smc->sk.sk_state == SMC_LISTEN) {
1877                        release_sock(&smc->sk);
1878                        return -EINVAL;
1879                }
1880                if (smc->sk.sk_state == SMC_INIT ||
1881                    smc->sk.sk_state == SMC_CLOSED)
1882                        answ = 0;
1883                else
1884                        answ = smc_tx_prepared_sends(&smc->conn);
1885                break;
1886        case SIOCATMARK:
1887                if (smc->sk.sk_state == SMC_LISTEN) {
1888                        release_sock(&smc->sk);
1889                        return -EINVAL;
1890                }
1891                if (smc->sk.sk_state == SMC_INIT ||
1892                    smc->sk.sk_state == SMC_CLOSED) {
1893                        answ = 0;
1894                } else {
1895                        smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1896                        smc_curs_copy(&urg, &conn->urg_curs, conn);
1897                        answ = smc_curs_diff(conn->rmb_desc->len,
1898                                             &cons, &urg) == 1;
1899                }
1900                break;
1901        default:
1902                release_sock(&smc->sk);
1903                return -ENOIOCTLCMD;
1904        }
1905        release_sock(&smc->sk);
1906
1907        return put_user(answ, (int __user *)arg);
1908}
1909
1910static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1911                            int offset, size_t size, int flags)
1912{
1913        struct sock *sk = sock->sk;
1914        struct smc_sock *smc;
1915        int rc = -EPIPE;
1916
1917        smc = smc_sk(sk);
1918        lock_sock(sk);
1919        if (sk->sk_state != SMC_ACTIVE) {
1920                release_sock(sk);
1921                goto out;
1922        }
1923        release_sock(sk);
1924        if (smc->use_fallback)
1925                rc = kernel_sendpage(smc->clcsock, page, offset,
1926                                     size, flags);
1927        else
1928                rc = sock_no_sendpage(sock, page, offset, size, flags);
1929
1930out:
1931        return rc;
1932}
1933
1934/* Map the affected portions of the rmbe into an spd, note the number of bytes
1935 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1936 * updates till whenever a respective page has been fully processed.
1937 * Note that subsequent recv() calls have to wait till all splice() processing
1938 * completed.
1939 */
1940static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1941                               struct pipe_inode_info *pipe, size_t len,
1942                               unsigned int flags)
1943{
1944        struct sock *sk = sock->sk;
1945        struct smc_sock *smc;
1946        int rc = -ENOTCONN;
1947
1948        smc = smc_sk(sk);
1949        lock_sock(sk);
1950        if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1951                /* socket was connected before, no more data to read */
1952                rc = 0;
1953                goto out;
1954        }
1955        if (sk->sk_state == SMC_INIT ||
1956            sk->sk_state == SMC_LISTEN ||
1957            sk->sk_state == SMC_CLOSED)
1958                goto out;
1959
1960        if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1961                rc = 0;
1962                goto out;
1963        }
1964
1965        if (smc->use_fallback) {
1966                rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1967                                                    pipe, len, flags);
1968        } else {
1969                if (*ppos) {
1970                        rc = -ESPIPE;
1971                        goto out;
1972                }
1973                if (flags & SPLICE_F_NONBLOCK)
1974                        flags = MSG_DONTWAIT;
1975                else
1976                        flags = 0;
1977                rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
1978        }
1979out:
1980        release_sock(sk);
1981
1982        return rc;
1983}
1984
1985/* must look like tcp */
1986static const struct proto_ops smc_sock_ops = {
1987        .family         = PF_SMC,
1988        .owner          = THIS_MODULE,
1989        .release        = smc_release,
1990        .bind           = smc_bind,
1991        .connect        = smc_connect,
1992        .socketpair     = sock_no_socketpair,
1993        .accept         = smc_accept,
1994        .getname        = smc_getname,
1995        .poll           = smc_poll,
1996        .ioctl          = smc_ioctl,
1997        .listen         = smc_listen,
1998        .shutdown       = smc_shutdown,
1999        .setsockopt     = smc_setsockopt,
2000        .getsockopt     = smc_getsockopt,
2001        .sendmsg        = smc_sendmsg,
2002        .recvmsg        = smc_recvmsg,
2003        .mmap           = sock_no_mmap,
2004        .sendpage       = smc_sendpage,
2005        .splice_read    = smc_splice_read,
2006};
2007
2008static int smc_create(struct net *net, struct socket *sock, int protocol,
2009                      int kern)
2010{
2011        int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
2012        struct smc_sock *smc;
2013        struct sock *sk;
2014        int rc;
2015
2016        rc = -ESOCKTNOSUPPORT;
2017        if (sock->type != SOCK_STREAM)
2018                goto out;
2019
2020        rc = -EPROTONOSUPPORT;
2021        if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
2022                goto out;
2023
2024        rc = -ENOBUFS;
2025        sock->ops = &smc_sock_ops;
2026        sk = smc_sock_alloc(net, sock, protocol);
2027        if (!sk)
2028                goto out;
2029
2030        /* create internal TCP socket for CLC handshake and fallback */
2031        smc = smc_sk(sk);
2032        smc->use_fallback = false; /* assume rdma capability first */
2033        smc->fallback_rsn = 0;
2034        rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
2035                              &smc->clcsock);
2036        if (rc) {
2037                sk_common_release(sk);
2038                goto out;
2039        }
2040        smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
2041        smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
2042
2043out:
2044        return rc;
2045}
2046
2047static const struct net_proto_family smc_sock_family_ops = {
2048        .family = PF_SMC,
2049        .owner  = THIS_MODULE,
2050        .create = smc_create,
2051};
2052
2053unsigned int smc_net_id;
2054
2055static __net_init int smc_net_init(struct net *net)
2056{
2057        return smc_pnet_net_init(net);
2058}
2059
2060static void __net_exit smc_net_exit(struct net *net)
2061{
2062        smc_pnet_net_exit(net);
2063}
2064
2065static struct pernet_operations smc_net_ops = {
2066        .init = smc_net_init,
2067        .exit = smc_net_exit,
2068        .id   = &smc_net_id,
2069        .size = sizeof(struct smc_net),
2070};
2071
2072static int __init smc_init(void)
2073{
2074        int rc;
2075
2076        rc = register_pernet_subsys(&smc_net_ops);
2077        if (rc)
2078                return rc;
2079
2080        rc = smc_pnet_init();
2081        if (rc)
2082                goto out_pernet_subsys;
2083
2084        rc = smc_core_init();
2085        if (rc) {
2086                pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
2087                goto out_pnet;
2088        }
2089
2090        rc = smc_llc_init();
2091        if (rc) {
2092                pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
2093                goto out_core;
2094        }
2095
2096        rc = smc_cdc_init();
2097        if (rc) {
2098                pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
2099                goto out_core;
2100        }
2101
2102        rc = proto_register(&smc_proto, 1);
2103        if (rc) {
2104                pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
2105                goto out_core;
2106        }
2107
2108        rc = proto_register(&smc_proto6, 1);
2109        if (rc) {
2110                pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
2111                goto out_proto;
2112        }
2113
2114        rc = sock_register(&smc_sock_family_ops);
2115        if (rc) {
2116                pr_err("%s: sock_register fails with %d\n", __func__, rc);
2117                goto out_proto6;
2118        }
2119        INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
2120        INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
2121
2122        rc = smc_ib_register_client();
2123        if (rc) {
2124                pr_err("%s: ib_register fails with %d\n", __func__, rc);
2125                goto out_sock;
2126        }
2127
2128        static_branch_enable(&tcp_have_smc);
2129        return 0;
2130
2131out_sock:
2132        sock_unregister(PF_SMC);
2133out_proto6:
2134        proto_unregister(&smc_proto6);
2135out_proto:
2136        proto_unregister(&smc_proto);
2137out_core:
2138        smc_core_exit();
2139out_pnet:
2140        smc_pnet_exit();
2141out_pernet_subsys:
2142        unregister_pernet_subsys(&smc_net_ops);
2143
2144        return rc;
2145}
2146
2147static void __exit smc_exit(void)
2148{
2149        static_branch_disable(&tcp_have_smc);
2150        sock_unregister(PF_SMC);
2151        smc_core_exit();
2152        smc_ib_unregister_client();
2153        proto_unregister(&smc_proto6);
2154        proto_unregister(&smc_proto);
2155        smc_pnet_exit();
2156        unregister_pernet_subsys(&smc_net_ops);
2157        rcu_barrier();
2158}
2159
2160module_init(smc_init);
2161module_exit(smc_exit);
2162
2163MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
2164MODULE_DESCRIPTION("smc socket address family");
2165MODULE_LICENSE("GPL");
2166MODULE_ALIAS_NETPROTO(PF_SMC);
2167