linux/net/smc/af_smc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
   6 *  applies to SOCK_STREAM sockets only
   7 *  offers an alternative communication option for TCP-protocol sockets
   8 *  applicable with RoCE-cards only
   9 *
  10 *  Initial restrictions:
  11 *    - support for alternate links postponed
  12 *
  13 *  Copyright IBM Corp. 2016, 2018
  14 *
  15 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  16 *              based on prototype from Frank Blaschka
  17 */
  18
  19#define KMSG_COMPONENT "smc"
  20#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  21
  22#include <linux/module.h>
  23#include <linux/socket.h>
  24#include <linux/workqueue.h>
  25#include <linux/in.h>
  26#include <linux/sched/signal.h>
  27#include <linux/if_vlan.h>
  28
  29#include <net/sock.h>
  30#include <net/tcp.h>
  31#include <net/smc.h>
  32#include <asm/ioctls.h>
  33
  34#include <net/net_namespace.h>
  35#include <net/netns/generic.h>
  36#include "smc_netns.h"
  37
  38#include "smc.h"
  39#include "smc_clc.h"
  40#include "smc_llc.h"
  41#include "smc_cdc.h"
  42#include "smc_core.h"
  43#include "smc_ib.h"
  44#include "smc_ism.h"
  45#include "smc_pnet.h"
  46#include "smc_tx.h"
  47#include "smc_rx.h"
  48#include "smc_close.h"
  49
  50static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link group
  51                                                 * creation on server
  52                                                 */
  53static DEFINE_MUTEX(smc_client_lgr_pending);    /* serialize link group
  54                                                 * creation on client
  55                                                 */
  56
  57static void smc_tcp_listen_work(struct work_struct *);
  58static void smc_connect_work(struct work_struct *);
  59
  60static void smc_set_keepalive(struct sock *sk, int val)
  61{
  62        struct smc_sock *smc = smc_sk(sk);
  63
  64        smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
  65}
  66
  67static struct smc_hashinfo smc_v4_hashinfo = {
  68        .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
  69};
  70
  71static struct smc_hashinfo smc_v6_hashinfo = {
  72        .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
  73};
  74
  75int smc_hash_sk(struct sock *sk)
  76{
  77        struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  78        struct hlist_head *head;
  79
  80        head = &h->ht;
  81
  82        write_lock_bh(&h->lock);
  83        sk_add_node(sk, head);
  84        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
  85        write_unlock_bh(&h->lock);
  86
  87        return 0;
  88}
  89EXPORT_SYMBOL_GPL(smc_hash_sk);
  90
  91void smc_unhash_sk(struct sock *sk)
  92{
  93        struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  94
  95        write_lock_bh(&h->lock);
  96        if (sk_del_node_init(sk))
  97                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  98        write_unlock_bh(&h->lock);
  99}
 100EXPORT_SYMBOL_GPL(smc_unhash_sk);
 101
 102struct proto smc_proto = {
 103        .name           = "SMC",
 104        .owner          = THIS_MODULE,
 105        .keepalive      = smc_set_keepalive,
 106        .hash           = smc_hash_sk,
 107        .unhash         = smc_unhash_sk,
 108        .obj_size       = sizeof(struct smc_sock),
 109        .h.smc_hash     = &smc_v4_hashinfo,
 110        .slab_flags     = SLAB_TYPESAFE_BY_RCU,
 111};
 112EXPORT_SYMBOL_GPL(smc_proto);
 113
 114struct proto smc_proto6 = {
 115        .name           = "SMC6",
 116        .owner          = THIS_MODULE,
 117        .keepalive      = smc_set_keepalive,
 118        .hash           = smc_hash_sk,
 119        .unhash         = smc_unhash_sk,
 120        .obj_size       = sizeof(struct smc_sock),
 121        .h.smc_hash     = &smc_v6_hashinfo,
 122        .slab_flags     = SLAB_TYPESAFE_BY_RCU,
 123};
 124EXPORT_SYMBOL_GPL(smc_proto6);
 125
 126static int __smc_release(struct smc_sock *smc)
 127{
 128        struct sock *sk = &smc->sk;
 129        int rc = 0;
 130
 131        if (!smc->use_fallback) {
 132                rc = smc_close_active(smc);
 133                sock_set_flag(sk, SOCK_DEAD);
 134                sk->sk_shutdown |= SHUTDOWN_MASK;
 135        } else {
 136                if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
 137                        sock_put(sk); /* passive closing */
 138                if (sk->sk_state == SMC_LISTEN) {
 139                        /* wake up clcsock accept */
 140                        rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
 141                }
 142                sk->sk_state = SMC_CLOSED;
 143                sk->sk_state_change(sk);
 144        }
 145
 146        sk->sk_prot->unhash(sk);
 147
 148        if (sk->sk_state == SMC_CLOSED) {
 149                if (smc->clcsock) {
 150                        release_sock(sk);
 151                        smc_clcsock_release(smc);
 152                        lock_sock(sk);
 153                }
 154                if (!smc->use_fallback)
 155                        smc_conn_free(&smc->conn);
 156        }
 157
 158        return rc;
 159}
 160
 161static int smc_release(struct socket *sock)
 162{
 163        struct sock *sk = sock->sk;
 164        struct smc_sock *smc;
 165        int rc = 0;
 166
 167        if (!sk)
 168                goto out;
 169
 170        smc = smc_sk(sk);
 171
 172        /* cleanup for a dangling non-blocking connect */
 173        if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
 174                tcp_abort(smc->clcsock->sk, ECONNABORTED);
 175        flush_work(&smc->connect_work);
 176
 177        if (sk->sk_state == SMC_LISTEN)
 178                /* smc_close_non_accepted() is called and acquires
 179                 * sock lock for child sockets again
 180                 */
 181                lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 182        else
 183                lock_sock(sk);
 184
 185        rc = __smc_release(smc);
 186
 187        /* detach socket */
 188        sock_orphan(sk);
 189        sock->sk = NULL;
 190        release_sock(sk);
 191
 192        sock_put(sk); /* final sock_put */
 193out:
 194        return rc;
 195}
 196
 197static void smc_destruct(struct sock *sk)
 198{
 199        if (sk->sk_state != SMC_CLOSED)
 200                return;
 201        if (!sock_flag(sk, SOCK_DEAD))
 202                return;
 203
 204        sk_refcnt_debug_dec(sk);
 205}
 206
 207static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
 208                                   int protocol)
 209{
 210        struct smc_sock *smc;
 211        struct proto *prot;
 212        struct sock *sk;
 213
 214        prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
 215        sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
 216        if (!sk)
 217                return NULL;
 218
 219        sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
 220        sk->sk_state = SMC_INIT;
 221        sk->sk_destruct = smc_destruct;
 222        sk->sk_protocol = protocol;
 223        smc = smc_sk(sk);
 224        INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 225        INIT_WORK(&smc->connect_work, smc_connect_work);
 226        INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
 227        INIT_LIST_HEAD(&smc->accept_q);
 228        spin_lock_init(&smc->accept_q_lock);
 229        spin_lock_init(&smc->conn.send_lock);
 230        sk->sk_prot->hash(sk);
 231        sk_refcnt_debug_inc(sk);
 232        mutex_init(&smc->clcsock_release_lock);
 233
 234        return sk;
 235}
 236
 237static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
 238                    int addr_len)
 239{
 240        struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
 241        struct sock *sk = sock->sk;
 242        struct smc_sock *smc;
 243        int rc;
 244
 245        smc = smc_sk(sk);
 246
 247        /* replicate tests from inet_bind(), to be safe wrt. future changes */
 248        rc = -EINVAL;
 249        if (addr_len < sizeof(struct sockaddr_in))
 250                goto out;
 251
 252        rc = -EAFNOSUPPORT;
 253        if (addr->sin_family != AF_INET &&
 254            addr->sin_family != AF_INET6 &&
 255            addr->sin_family != AF_UNSPEC)
 256                goto out;
 257        /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
 258        if (addr->sin_family == AF_UNSPEC &&
 259            addr->sin_addr.s_addr != htonl(INADDR_ANY))
 260                goto out;
 261
 262        lock_sock(sk);
 263
 264        /* Check if socket is already active */
 265        rc = -EINVAL;
 266        if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
 267                goto out_rel;
 268
 269        smc->clcsock->sk->sk_reuse = sk->sk_reuse;
 270        rc = kernel_bind(smc->clcsock, uaddr, addr_len);
 271
 272out_rel:
 273        release_sock(sk);
 274out:
 275        return rc;
 276}
 277
 278static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
 279                                   unsigned long mask)
 280{
 281        /* options we don't get control via setsockopt for */
 282        nsk->sk_type = osk->sk_type;
 283        nsk->sk_sndbuf = osk->sk_sndbuf;
 284        nsk->sk_rcvbuf = osk->sk_rcvbuf;
 285        nsk->sk_sndtimeo = osk->sk_sndtimeo;
 286        nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
 287        nsk->sk_mark = osk->sk_mark;
 288        nsk->sk_priority = osk->sk_priority;
 289        nsk->sk_rcvlowat = osk->sk_rcvlowat;
 290        nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
 291        nsk->sk_err = osk->sk_err;
 292
 293        nsk->sk_flags &= ~mask;
 294        nsk->sk_flags |= osk->sk_flags & mask;
 295}
 296
 297#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
 298                             (1UL << SOCK_KEEPOPEN) | \
 299                             (1UL << SOCK_LINGER) | \
 300                             (1UL << SOCK_BROADCAST) | \
 301                             (1UL << SOCK_TIMESTAMP) | \
 302                             (1UL << SOCK_DBG) | \
 303                             (1UL << SOCK_RCVTSTAMP) | \
 304                             (1UL << SOCK_RCVTSTAMPNS) | \
 305                             (1UL << SOCK_LOCALROUTE) | \
 306                             (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
 307                             (1UL << SOCK_RXQ_OVFL) | \
 308                             (1UL << SOCK_WIFI_STATUS) | \
 309                             (1UL << SOCK_NOFCS) | \
 310                             (1UL << SOCK_FILTER_LOCKED) | \
 311                             (1UL << SOCK_TSTAMP_NEW))
 312/* copy only relevant settings and flags of SOL_SOCKET level from smc to
 313 * clc socket (since smc is not called for these options from net/core)
 314 */
 315static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
 316{
 317        smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
 318}
 319
 320#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
 321                             (1UL << SOCK_KEEPOPEN) | \
 322                             (1UL << SOCK_LINGER) | \
 323                             (1UL << SOCK_DBG))
 324/* copy only settings and flags relevant for smc from clc to smc socket */
 325static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 326{
 327        smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 328}
 329
 330/* register a new rmb, send confirm_rkey msg to register with peer */
 331static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
 332                       bool conf_rkey)
 333{
 334        if (!rmb_desc->wr_reg) {
 335                /* register memory region for new rmb */
 336                if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
 337                        rmb_desc->regerr = 1;
 338                        return -EFAULT;
 339                }
 340                rmb_desc->wr_reg = 1;
 341        }
 342        if (!conf_rkey)
 343                return 0;
 344        /* exchange confirm_rkey msg with peer */
 345        if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
 346                rmb_desc->regerr = 1;
 347                return -EFAULT;
 348        }
 349        return 0;
 350}
 351
 352static int smc_clnt_conf_first_link(struct smc_sock *smc)
 353{
 354        struct net *net = sock_net(smc->clcsock->sk);
 355        struct smc_link_group *lgr = smc->conn.lgr;
 356        struct smc_link *link;
 357        int rest;
 358        int rc;
 359
 360        link = &lgr->lnk[SMC_SINGLE_LINK];
 361        /* receive CONFIRM LINK request from server over RoCE fabric */
 362        rest = wait_for_completion_interruptible_timeout(
 363                &link->llc_confirm,
 364                SMC_LLC_WAIT_FIRST_TIME);
 365        if (rest <= 0) {
 366                struct smc_clc_msg_decline dclc;
 367
 368                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 369                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 370                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 371        }
 372
 373        if (link->llc_confirm_rc)
 374                return SMC_CLC_DECL_RMBE_EC;
 375
 376        rc = smc_ib_modify_qp_rts(link);
 377        if (rc)
 378                return SMC_CLC_DECL_ERR_RDYLNK;
 379
 380        smc_wr_remember_qp_attr(link);
 381
 382        if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
 383                return SMC_CLC_DECL_ERR_REGRMB;
 384
 385        /* send CONFIRM LINK response over RoCE fabric */
 386        rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
 387        if (rc < 0)
 388                return SMC_CLC_DECL_TIMEOUT_CL;
 389
 390        /* receive ADD LINK request from server over RoCE fabric */
 391        rest = wait_for_completion_interruptible_timeout(&link->llc_add,
 392                                                         SMC_LLC_WAIT_TIME);
 393        if (rest <= 0) {
 394                struct smc_clc_msg_decline dclc;
 395
 396                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 397                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 398                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
 399        }
 400
 401        /* send add link reject message, only one link supported for now */
 402        rc = smc_llc_send_add_link(link,
 403                                   link->smcibdev->mac[link->ibport - 1],
 404                                   link->gid, SMC_LLC_RESP);
 405        if (rc < 0)
 406                return SMC_CLC_DECL_TIMEOUT_AL;
 407
 408        smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 409
 410        return 0;
 411}
 412
 413static void smcr_conn_save_peer_info(struct smc_sock *smc,
 414                                     struct smc_clc_msg_accept_confirm *clc)
 415{
 416        int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
 417
 418        smc->conn.peer_rmbe_idx = clc->rmbe_idx;
 419        smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
 420        smc->conn.peer_rmbe_size = bufsize;
 421        atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
 422        smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
 423}
 424
 425static void smcd_conn_save_peer_info(struct smc_sock *smc,
 426                                     struct smc_clc_msg_accept_confirm *clc)
 427{
 428        int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
 429
 430        smc->conn.peer_rmbe_idx = clc->dmbe_idx;
 431        smc->conn.peer_token = clc->token;
 432        /* msg header takes up space in the buffer */
 433        smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
 434        atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
 435        smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
 436}
 437
 438static void smc_conn_save_peer_info(struct smc_sock *smc,
 439                                    struct smc_clc_msg_accept_confirm *clc)
 440{
 441        if (smc->conn.lgr->is_smcd)
 442                smcd_conn_save_peer_info(smc, clc);
 443        else
 444                smcr_conn_save_peer_info(smc, clc);
 445}
 446
 447static void smc_link_save_peer_info(struct smc_link *link,
 448                                    struct smc_clc_msg_accept_confirm *clc)
 449{
 450        link->peer_qpn = ntoh24(clc->qpn);
 451        memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
 452        memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
 453        link->peer_psn = ntoh24(clc->psn);
 454        link->peer_mtu = clc->qp_mtu;
 455}
 456
 457static void smc_switch_to_fallback(struct smc_sock *smc)
 458{
 459        smc->use_fallback = true;
 460        if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
 461                smc->clcsock->file = smc->sk.sk_socket->file;
 462                smc->clcsock->file->private_data = smc->clcsock;
 463        }
 464}
 465
 466/* fall back during connect */
 467static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 468{
 469        smc_switch_to_fallback(smc);
 470        smc->fallback_rsn = reason_code;
 471        smc_copy_sock_settings_to_clc(smc);
 472        smc->connect_nonblock = 0;
 473        if (smc->sk.sk_state == SMC_INIT)
 474                smc->sk.sk_state = SMC_ACTIVE;
 475        return 0;
 476}
 477
 478/* decline and fall back during connect */
 479static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
 480{
 481        int rc;
 482
 483        if (reason_code < 0) { /* error, fallback is not possible */
 484                if (smc->sk.sk_state == SMC_INIT)
 485                        sock_put(&smc->sk); /* passive closing */
 486                return reason_code;
 487        }
 488        if (reason_code != SMC_CLC_DECL_PEERDECL) {
 489                rc = smc_clc_send_decline(smc, reason_code);
 490                if (rc < 0) {
 491                        if (smc->sk.sk_state == SMC_INIT)
 492                                sock_put(&smc->sk); /* passive closing */
 493                        return rc;
 494                }
 495        }
 496        return smc_connect_fallback(smc, reason_code);
 497}
 498
 499/* abort connecting */
 500static int smc_connect_abort(struct smc_sock *smc, int reason_code,
 501                             int local_contact)
 502{
 503        if (local_contact == SMC_FIRST_CONTACT)
 504                smc_lgr_forget(smc->conn.lgr);
 505        if (smc->conn.lgr->is_smcd)
 506                /* there is only one lgr role for SMC-D; use server lock */
 507                mutex_unlock(&smc_server_lgr_pending);
 508        else
 509                mutex_unlock(&smc_client_lgr_pending);
 510
 511        smc_conn_free(&smc->conn);
 512        smc->connect_nonblock = 0;
 513        return reason_code;
 514}
 515
 516/* check if there is a rdma device available for this connection. */
 517/* called for connect and listen */
 518static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
 519{
 520        /* PNET table look up: search active ib_device and port
 521         * within same PNETID that also contains the ethernet device
 522         * used for the internal TCP socket
 523         */
 524        smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
 525        if (!ini->ib_dev)
 526                return SMC_CLC_DECL_NOSMCRDEV;
 527        return 0;
 528}
 529
 530/* check if there is an ISM device available for this connection. */
 531/* called for connect and listen */
 532static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
 533{
 534        /* Find ISM device with same PNETID as connecting interface  */
 535        smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
 536        if (!ini->ism_dev)
 537                return SMC_CLC_DECL_NOSMCDDEV;
 538        return 0;
 539}
 540
 541/* Check for VLAN ID and register it on ISM device just for CLC handshake */
 542static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
 543                                      struct smc_init_info *ini)
 544{
 545        if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
 546                return SMC_CLC_DECL_ISMVLANERR;
 547        return 0;
 548}
 549
 550/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
 551 * used, the VLAN ID will be registered again during the connection setup.
 552 */
 553static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
 554                                        struct smc_init_info *ini)
 555{
 556        if (!is_smcd)
 557                return 0;
 558        if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
 559                return SMC_CLC_DECL_CNFERR;
 560        return 0;
 561}
 562
 563/* CLC handshake during connect */
 564static int smc_connect_clc(struct smc_sock *smc, int smc_type,
 565                           struct smc_clc_msg_accept_confirm *aclc,
 566                           struct smc_init_info *ini)
 567{
 568        int rc = 0;
 569
 570        /* do inband token exchange */
 571        rc = smc_clc_send_proposal(smc, smc_type, ini);
 572        if (rc)
 573                return rc;
 574        /* receive SMC Accept CLC message */
 575        return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
 576                                CLC_WAIT_TIME);
 577}
 578
 579/* setup for RDMA connection of client */
 580static int smc_connect_rdma(struct smc_sock *smc,
 581                            struct smc_clc_msg_accept_confirm *aclc,
 582                            struct smc_init_info *ini)
 583{
 584        struct smc_link *link;
 585        int reason_code = 0;
 586
 587        ini->is_smcd = false;
 588        ini->ib_lcl = &aclc->lcl;
 589        ini->ib_clcqpn = ntoh24(aclc->qpn);
 590        ini->srv_first_contact = aclc->hdr.flag;
 591
 592        mutex_lock(&smc_client_lgr_pending);
 593        reason_code = smc_conn_create(smc, ini);
 594        if (reason_code) {
 595                mutex_unlock(&smc_client_lgr_pending);
 596                return reason_code;
 597        }
 598        link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 599
 600        smc_conn_save_peer_info(smc, aclc);
 601
 602        /* create send buffer and rmb */
 603        if (smc_buf_create(smc, false))
 604                return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
 605                                         ini->cln_first_contact);
 606
 607        if (ini->cln_first_contact == SMC_FIRST_CONTACT)
 608                smc_link_save_peer_info(link, aclc);
 609
 610        if (smc_rmb_rtoken_handling(&smc->conn, aclc))
 611                return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
 612                                         ini->cln_first_contact);
 613
 614        smc_close_init(smc);
 615        smc_rx_init(smc);
 616
 617        if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 618                if (smc_ib_ready_link(link))
 619                        return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
 620                                                 ini->cln_first_contact);
 621        } else {
 622                if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
 623                        return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
 624                                                 ini->cln_first_contact);
 625        }
 626        smc_rmb_sync_sg_for_device(&smc->conn);
 627
 628        reason_code = smc_clc_send_confirm(smc);
 629        if (reason_code)
 630                return smc_connect_abort(smc, reason_code,
 631                                         ini->cln_first_contact);
 632
 633        smc_tx_init(smc);
 634
 635        if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 636                /* QP confirmation over RoCE fabric */
 637                reason_code = smc_clnt_conf_first_link(smc);
 638                if (reason_code)
 639                        return smc_connect_abort(smc, reason_code,
 640                                                 ini->cln_first_contact);
 641        }
 642        mutex_unlock(&smc_client_lgr_pending);
 643
 644        smc_copy_sock_settings_to_clc(smc);
 645        smc->connect_nonblock = 0;
 646        if (smc->sk.sk_state == SMC_INIT)
 647                smc->sk.sk_state = SMC_ACTIVE;
 648
 649        return 0;
 650}
 651
 652/* setup for ISM connection of client */
 653static int smc_connect_ism(struct smc_sock *smc,
 654                           struct smc_clc_msg_accept_confirm *aclc,
 655                           struct smc_init_info *ini)
 656{
 657        int rc = 0;
 658
 659        ini->is_smcd = true;
 660        ini->ism_gid = aclc->gid;
 661        ini->srv_first_contact = aclc->hdr.flag;
 662
 663        /* there is only one lgr role for SMC-D; use server lock */
 664        mutex_lock(&smc_server_lgr_pending);
 665        rc = smc_conn_create(smc, ini);
 666        if (rc) {
 667                mutex_unlock(&smc_server_lgr_pending);
 668                return rc;
 669        }
 670
 671        /* Create send and receive buffers */
 672        if (smc_buf_create(smc, true))
 673                return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
 674                                         ini->cln_first_contact);
 675
 676        smc_conn_save_peer_info(smc, aclc);
 677        smc_close_init(smc);
 678        smc_rx_init(smc);
 679        smc_tx_init(smc);
 680
 681        rc = smc_clc_send_confirm(smc);
 682        if (rc)
 683                return smc_connect_abort(smc, rc, ini->cln_first_contact);
 684        mutex_unlock(&smc_server_lgr_pending);
 685
 686        smc_copy_sock_settings_to_clc(smc);
 687        smc->connect_nonblock = 0;
 688        if (smc->sk.sk_state == SMC_INIT)
 689                smc->sk.sk_state = SMC_ACTIVE;
 690
 691        return 0;
 692}
 693
 694/* perform steps before actually connecting */
 695static int __smc_connect(struct smc_sock *smc)
 696{
 697        bool ism_supported = false, rdma_supported = false;
 698        struct smc_clc_msg_accept_confirm aclc;
 699        struct smc_init_info ini = {0};
 700        int smc_type;
 701        int rc = 0;
 702
 703        sock_hold(&smc->sk); /* sock put in passive closing */
 704
 705        if (smc->use_fallback)
 706                return smc_connect_fallback(smc, smc->fallback_rsn);
 707
 708        /* if peer has not signalled SMC-capability, fall back */
 709        if (!tcp_sk(smc->clcsock->sk)->syn_smc)
 710                return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
 711
 712        /* IPSec connections opt out of SMC-R optimizations */
 713        if (using_ipsec(smc))
 714                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
 715
 716        /* get vlan id from IP device */
 717        if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
 718                return smc_connect_decline_fallback(smc,
 719                                                    SMC_CLC_DECL_GETVLANERR);
 720
 721        /* check if there is an ism device available */
 722        if (!smc_find_ism_device(smc, &ini) &&
 723            !smc_connect_ism_vlan_setup(smc, &ini)) {
 724                /* ISM is supported for this connection */
 725                ism_supported = true;
 726                smc_type = SMC_TYPE_D;
 727        }
 728
 729        /* check if there is a rdma device available */
 730        if (!smc_find_rdma_device(smc, &ini)) {
 731                /* RDMA is supported for this connection */
 732                rdma_supported = true;
 733                if (ism_supported)
 734                        smc_type = SMC_TYPE_B; /* both */
 735                else
 736                        smc_type = SMC_TYPE_R; /* only RDMA */
 737        }
 738
 739        /* if neither ISM nor RDMA are supported, fallback */
 740        if (!rdma_supported && !ism_supported)
 741                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
 742
 743        /* perform CLC handshake */
 744        rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
 745        if (rc) {
 746                smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 747                return smc_connect_decline_fallback(smc, rc);
 748        }
 749
 750        /* depending on previous steps, connect using rdma or ism */
 751        if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
 752                rc = smc_connect_rdma(smc, &aclc, &ini);
 753        else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
 754                rc = smc_connect_ism(smc, &aclc, &ini);
 755        else
 756                rc = SMC_CLC_DECL_MODEUNSUPP;
 757        if (rc) {
 758                smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 759                return smc_connect_decline_fallback(smc, rc);
 760        }
 761
 762        smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 763        return 0;
 764}
 765
 766static void smc_connect_work(struct work_struct *work)
 767{
 768        struct smc_sock *smc = container_of(work, struct smc_sock,
 769                                            connect_work);
 770        long timeo = smc->sk.sk_sndtimeo;
 771        int rc = 0;
 772
 773        if (!timeo)
 774                timeo = MAX_SCHEDULE_TIMEOUT;
 775        lock_sock(smc->clcsock->sk);
 776        if (smc->clcsock->sk->sk_err) {
 777                smc->sk.sk_err = smc->clcsock->sk->sk_err;
 778        } else if ((1 << smc->clcsock->sk->sk_state) &
 779                                        (TCPF_SYN_SENT | TCP_SYN_RECV)) {
 780                rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
 781                if ((rc == -EPIPE) &&
 782                    ((1 << smc->clcsock->sk->sk_state) &
 783                                        (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
 784                        rc = 0;
 785        }
 786        release_sock(smc->clcsock->sk);
 787        lock_sock(&smc->sk);
 788        if (rc != 0 || smc->sk.sk_err) {
 789                smc->sk.sk_state = SMC_CLOSED;
 790                if (rc == -EPIPE || rc == -EAGAIN)
 791                        smc->sk.sk_err = EPIPE;
 792                else if (signal_pending(current))
 793                        smc->sk.sk_err = -sock_intr_errno(timeo);
 794                goto out;
 795        }
 796
 797        rc = __smc_connect(smc);
 798        if (rc < 0)
 799                smc->sk.sk_err = -rc;
 800
 801out:
 802        if (!sock_flag(&smc->sk, SOCK_DEAD)) {
 803                if (smc->sk.sk_err) {
 804                        smc->sk.sk_state_change(&smc->sk);
 805                } else { /* allow polling before and after fallback decision */
 806                        smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
 807                        smc->sk.sk_write_space(&smc->sk);
 808                }
 809        }
 810        release_sock(&smc->sk);
 811}
 812
 813static int smc_connect(struct socket *sock, struct sockaddr *addr,
 814                       int alen, int flags)
 815{
 816        struct sock *sk = sock->sk;
 817        struct smc_sock *smc;
 818        int rc = -EINVAL;
 819
 820        smc = smc_sk(sk);
 821
 822        /* separate smc parameter checking to be safe */
 823        if (alen < sizeof(addr->sa_family))
 824                goto out_err;
 825        if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 826                goto out_err;
 827
 828        lock_sock(sk);
 829        switch (sk->sk_state) {
 830        default:
 831                goto out;
 832        case SMC_ACTIVE:
 833                rc = -EISCONN;
 834                goto out;
 835        case SMC_INIT:
 836                rc = 0;
 837                break;
 838        }
 839
 840        smc_copy_sock_settings_to_clc(smc);
 841        tcp_sk(smc->clcsock->sk)->syn_smc = 1;
 842        if (smc->connect_nonblock) {
 843                rc = -EALREADY;
 844                goto out;
 845        }
 846        rc = kernel_connect(smc->clcsock, addr, alen, flags);
 847        if (rc && rc != -EINPROGRESS)
 848                goto out;
 849        if (flags & O_NONBLOCK) {
 850                if (schedule_work(&smc->connect_work))
 851                        smc->connect_nonblock = 1;
 852                rc = -EINPROGRESS;
 853        } else {
 854                rc = __smc_connect(smc);
 855                if (rc < 0)
 856                        goto out;
 857                else
 858                        rc = 0; /* success cases including fallback */
 859        }
 860
 861out:
 862        release_sock(sk);
 863out_err:
 864        return rc;
 865}
 866
 867static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 868{
 869        struct socket *new_clcsock = NULL;
 870        struct sock *lsk = &lsmc->sk;
 871        struct sock *new_sk;
 872        int rc = -EINVAL;
 873
 874        release_sock(lsk);
 875        new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
 876        if (!new_sk) {
 877                rc = -ENOMEM;
 878                lsk->sk_err = ENOMEM;
 879                *new_smc = NULL;
 880                lock_sock(lsk);
 881                goto out;
 882        }
 883        *new_smc = smc_sk(new_sk);
 884
 885        mutex_lock(&lsmc->clcsock_release_lock);
 886        if (lsmc->clcsock)
 887                rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
 888        mutex_unlock(&lsmc->clcsock_release_lock);
 889        lock_sock(lsk);
 890        if  (rc < 0)
 891                lsk->sk_err = -rc;
 892        if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
 893                new_sk->sk_prot->unhash(new_sk);
 894                if (new_clcsock)
 895                        sock_release(new_clcsock);
 896                new_sk->sk_state = SMC_CLOSED;
 897                sock_set_flag(new_sk, SOCK_DEAD);
 898                sock_put(new_sk); /* final */
 899                *new_smc = NULL;
 900                goto out;
 901        }
 902
 903        (*new_smc)->clcsock = new_clcsock;
 904out:
 905        return rc;
 906}
 907
 908/* add a just created sock to the accept queue of the listen sock as
 909 * candidate for a following socket accept call from user space
 910 */
 911static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
 912{
 913        struct smc_sock *par = smc_sk(parent);
 914
 915        sock_hold(sk); /* sock_put in smc_accept_unlink () */
 916        spin_lock(&par->accept_q_lock);
 917        list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
 918        spin_unlock(&par->accept_q_lock);
 919        sk_acceptq_added(parent);
 920}
 921
 922/* remove a socket from the accept queue of its parental listening socket */
 923static void smc_accept_unlink(struct sock *sk)
 924{
 925        struct smc_sock *par = smc_sk(sk)->listen_smc;
 926
 927        spin_lock(&par->accept_q_lock);
 928        list_del_init(&smc_sk(sk)->accept_q);
 929        spin_unlock(&par->accept_q_lock);
 930        sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
 931        sock_put(sk); /* sock_hold in smc_accept_enqueue */
 932}
 933
 934/* remove a sock from the accept queue to bind it to a new socket created
 935 * for a socket accept call from user space
 936 */
 937struct sock *smc_accept_dequeue(struct sock *parent,
 938                                struct socket *new_sock)
 939{
 940        struct smc_sock *isk, *n;
 941        struct sock *new_sk;
 942
 943        list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
 944                new_sk = (struct sock *)isk;
 945
 946                smc_accept_unlink(new_sk);
 947                if (new_sk->sk_state == SMC_CLOSED) {
 948                        new_sk->sk_prot->unhash(new_sk);
 949                        if (isk->clcsock) {
 950                                sock_release(isk->clcsock);
 951                                isk->clcsock = NULL;
 952                        }
 953                        sock_put(new_sk); /* final */
 954                        continue;
 955                }
 956                if (new_sock) {
 957                        sock_graft(new_sk, new_sock);
 958                        if (isk->use_fallback) {
 959                                smc_sk(new_sk)->clcsock->file = new_sock->file;
 960                                isk->clcsock->file->private_data = isk->clcsock;
 961                        }
 962                }
 963                return new_sk;
 964        }
 965        return NULL;
 966}
 967
 968/* clean up for a created but never accepted sock */
 969void smc_close_non_accepted(struct sock *sk)
 970{
 971        struct smc_sock *smc = smc_sk(sk);
 972
 973        lock_sock(sk);
 974        if (!sk->sk_lingertime)
 975                /* wait for peer closing */
 976                sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
 977        __smc_release(smc);
 978        release_sock(sk);
 979        sock_put(sk); /* final sock_put */
 980}
 981
 982static int smc_serv_conf_first_link(struct smc_sock *smc)
 983{
 984        struct net *net = sock_net(smc->clcsock->sk);
 985        struct smc_link_group *lgr = smc->conn.lgr;
 986        struct smc_link *link;
 987        int rest;
 988        int rc;
 989
 990        link = &lgr->lnk[SMC_SINGLE_LINK];
 991
 992        if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
 993                return SMC_CLC_DECL_ERR_REGRMB;
 994
 995        /* send CONFIRM LINK request to client over the RoCE fabric */
 996        rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
 997        if (rc < 0)
 998                return SMC_CLC_DECL_TIMEOUT_CL;
 999
1000        /* receive CONFIRM LINK response from client over the RoCE fabric */
1001        rest = wait_for_completion_interruptible_timeout(
1002                &link->llc_confirm_resp,
1003                SMC_LLC_WAIT_FIRST_TIME);
1004        if (rest <= 0) {
1005                struct smc_clc_msg_decline dclc;
1006
1007                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1008                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1009                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
1010        }
1011
1012        if (link->llc_confirm_resp_rc)
1013                return SMC_CLC_DECL_RMBE_EC;
1014
1015        /* send ADD LINK request to client over the RoCE fabric */
1016        rc = smc_llc_send_add_link(link,
1017                                   link->smcibdev->mac[link->ibport - 1],
1018                                   link->gid, SMC_LLC_REQ);
1019        if (rc < 0)
1020                return SMC_CLC_DECL_TIMEOUT_AL;
1021
1022        /* receive ADD LINK response from client over the RoCE fabric */
1023        rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
1024                                                         SMC_LLC_WAIT_TIME);
1025        if (rest <= 0) {
1026                struct smc_clc_msg_decline dclc;
1027
1028                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1029                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1030                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
1031        }
1032
1033        smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
1034
1035        return 0;
1036}
1037
1038/* listen worker: finish */
1039static void smc_listen_out(struct smc_sock *new_smc)
1040{
1041        struct smc_sock *lsmc = new_smc->listen_smc;
1042        struct sock *newsmcsk = &new_smc->sk;
1043
1044        if (lsmc->sk.sk_state == SMC_LISTEN) {
1045                lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1046                smc_accept_enqueue(&lsmc->sk, newsmcsk);
1047                release_sock(&lsmc->sk);
1048        } else { /* no longer listening */
1049                smc_close_non_accepted(newsmcsk);
1050        }
1051
1052        /* Wake up accept */
1053        lsmc->sk.sk_data_ready(&lsmc->sk);
1054        sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1055}
1056
1057/* listen worker: finish in state connected */
1058static void smc_listen_out_connected(struct smc_sock *new_smc)
1059{
1060        struct sock *newsmcsk = &new_smc->sk;
1061
1062        sk_refcnt_debug_inc(newsmcsk);
1063        if (newsmcsk->sk_state == SMC_INIT)
1064                newsmcsk->sk_state = SMC_ACTIVE;
1065
1066        smc_listen_out(new_smc);
1067}
1068
1069/* listen worker: finish in error state */
1070static void smc_listen_out_err(struct smc_sock *new_smc)
1071{
1072        struct sock *newsmcsk = &new_smc->sk;
1073
1074        if (newsmcsk->sk_state == SMC_INIT)
1075                sock_put(&new_smc->sk); /* passive closing */
1076        newsmcsk->sk_state = SMC_CLOSED;
1077        smc_conn_free(&new_smc->conn);
1078
1079        smc_listen_out(new_smc);
1080}
1081
1082/* listen worker: decline and fall back if possible */
1083static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1084                               int local_contact)
1085{
1086        /* RDMA setup failed, switch back to TCP */
1087        if (local_contact == SMC_FIRST_CONTACT)
1088                smc_lgr_forget(new_smc->conn.lgr);
1089        if (reason_code < 0) { /* error, no fallback possible */
1090                smc_listen_out_err(new_smc);
1091                return;
1092        }
1093        smc_conn_free(&new_smc->conn);
1094        smc_switch_to_fallback(new_smc);
1095        new_smc->fallback_rsn = reason_code;
1096        if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1097                if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1098                        smc_listen_out_err(new_smc);
1099                        return;
1100                }
1101        }
1102        smc_listen_out_connected(new_smc);
1103}
1104
1105/* listen worker: check prefixes */
1106static int smc_listen_prfx_check(struct smc_sock *new_smc,
1107                                 struct smc_clc_msg_proposal *pclc)
1108{
1109        struct smc_clc_msg_proposal_prefix *pclc_prfx;
1110        struct socket *newclcsock = new_smc->clcsock;
1111
1112        pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1113        if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1114                return SMC_CLC_DECL_DIFFPREFIX;
1115
1116        return 0;
1117}
1118
1119/* listen worker: initialize connection and buffers */
1120static int smc_listen_rdma_init(struct smc_sock *new_smc,
1121                                struct smc_init_info *ini)
1122{
1123        int rc;
1124
1125        /* allocate connection / link group */
1126        rc = smc_conn_create(new_smc, ini);
1127        if (rc)
1128                return rc;
1129
1130        /* create send buffer and rmb */
1131        if (smc_buf_create(new_smc, false))
1132                return SMC_CLC_DECL_MEM;
1133
1134        return 0;
1135}
1136
1137/* listen worker: initialize connection and buffers for SMC-D */
1138static int smc_listen_ism_init(struct smc_sock *new_smc,
1139                               struct smc_clc_msg_proposal *pclc,
1140                               struct smc_init_info *ini)
1141{
1142        struct smc_clc_msg_smcd *pclc_smcd;
1143        int rc;
1144
1145        pclc_smcd = smc_get_clc_msg_smcd(pclc);
1146        ini->ism_gid = pclc_smcd->gid;
1147        rc = smc_conn_create(new_smc, ini);
1148        if (rc)
1149                return rc;
1150
1151        /* Check if peer can be reached via ISM device */
1152        if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1153                            new_smc->conn.lgr->vlan_id,
1154                            new_smc->conn.lgr->smcd)) {
1155                if (ini->cln_first_contact == SMC_FIRST_CONTACT)
1156                        smc_lgr_forget(new_smc->conn.lgr);
1157                smc_conn_free(&new_smc->conn);
1158                return SMC_CLC_DECL_SMCDNOTALK;
1159        }
1160
1161        /* Create send and receive buffers */
1162        if (smc_buf_create(new_smc, true)) {
1163                if (ini->cln_first_contact == SMC_FIRST_CONTACT)
1164                        smc_lgr_forget(new_smc->conn.lgr);
1165                smc_conn_free(&new_smc->conn);
1166                return SMC_CLC_DECL_MEM;
1167        }
1168
1169        return 0;
1170}
1171
1172/* listen worker: register buffers */
1173static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
1174{
1175        struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1176
1177        if (local_contact != SMC_FIRST_CONTACT) {
1178                if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
1179                        return SMC_CLC_DECL_ERR_REGRMB;
1180        }
1181        smc_rmb_sync_sg_for_device(&new_smc->conn);
1182
1183        return 0;
1184}
1185
1186/* listen worker: finish RDMA setup */
1187static int smc_listen_rdma_finish(struct smc_sock *new_smc,
1188                                  struct smc_clc_msg_accept_confirm *cclc,
1189                                  int local_contact)
1190{
1191        struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1192        int reason_code = 0;
1193
1194        if (local_contact == SMC_FIRST_CONTACT)
1195                smc_link_save_peer_info(link, cclc);
1196
1197        if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
1198                reason_code = SMC_CLC_DECL_ERR_RTOK;
1199                goto decline;
1200        }
1201
1202        if (local_contact == SMC_FIRST_CONTACT) {
1203                if (smc_ib_ready_link(link)) {
1204                        reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1205                        goto decline;
1206                }
1207                /* QP confirmation over RoCE fabric */
1208                reason_code = smc_serv_conf_first_link(new_smc);
1209                if (reason_code)
1210                        goto decline;
1211        }
1212        return 0;
1213
1214decline:
1215        smc_listen_decline(new_smc, reason_code, local_contact);
1216        return reason_code;
1217}
1218
1219/* setup for RDMA connection of server */
1220static void smc_listen_work(struct work_struct *work)
1221{
1222        struct smc_sock *new_smc = container_of(work, struct smc_sock,
1223                                                smc_listen_work);
1224        struct socket *newclcsock = new_smc->clcsock;
1225        struct smc_clc_msg_accept_confirm cclc;
1226        struct smc_clc_msg_proposal *pclc;
1227        struct smc_init_info ini = {0};
1228        bool ism_supported = false;
1229        u8 buf[SMC_CLC_MAX_LEN];
1230        int rc = 0;
1231
1232        if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
1233                return smc_listen_out_err(new_smc);
1234
1235        if (new_smc->use_fallback) {
1236                smc_listen_out_connected(new_smc);
1237                return;
1238        }
1239
1240        /* check if peer is smc capable */
1241        if (!tcp_sk(newclcsock->sk)->syn_smc) {
1242                smc_switch_to_fallback(new_smc);
1243                new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
1244                smc_listen_out_connected(new_smc);
1245                return;
1246        }
1247
1248        /* do inband token exchange -
1249         * wait for and receive SMC Proposal CLC message
1250         */
1251        pclc = (struct smc_clc_msg_proposal *)&buf;
1252        rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
1253                              SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
1254        if (rc)
1255                goto out_decl;
1256
1257        /* IPSec connections opt out of SMC-R optimizations */
1258        if (using_ipsec(new_smc)) {
1259                rc = SMC_CLC_DECL_IPSEC;
1260                goto out_decl;
1261        }
1262
1263        /* check for matching IP prefix and subnet length */
1264        rc = smc_listen_prfx_check(new_smc, pclc);
1265        if (rc)
1266                goto out_decl;
1267
1268        /* get vlan id from IP device */
1269        if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
1270                rc = SMC_CLC_DECL_GETVLANERR;
1271                goto out_decl;
1272        }
1273
1274        mutex_lock(&smc_server_lgr_pending);
1275        smc_close_init(new_smc);
1276        smc_rx_init(new_smc);
1277        smc_tx_init(new_smc);
1278
1279        /* check if ISM is available */
1280        if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
1281                ini.is_smcd = true; /* prepare ISM check */
1282                rc = smc_find_ism_device(new_smc, &ini);
1283                if (!rc)
1284                        rc = smc_listen_ism_init(new_smc, pclc, &ini);
1285                if (!rc)
1286                        ism_supported = true;
1287                else if (pclc->hdr.path == SMC_TYPE_D)
1288                        goto out_unlock; /* skip RDMA and decline */
1289        }
1290
1291        /* check if RDMA is available */
1292        if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
1293                /* prepare RDMA check */
1294                memset(&ini, 0, sizeof(ini));
1295                ini.is_smcd = false;
1296                ini.ib_lcl = &pclc->lcl;
1297                rc = smc_find_rdma_device(new_smc, &ini);
1298                if (rc) {
1299                        /* no RDMA device found */
1300                        if (pclc->hdr.path == SMC_TYPE_B)
1301                                /* neither ISM nor RDMA device found */
1302                                rc = SMC_CLC_DECL_NOSMCDEV;
1303                        goto out_unlock;
1304                }
1305                rc = smc_listen_rdma_init(new_smc, &ini);
1306                if (rc)
1307                        goto out_unlock;
1308                rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact);
1309                if (rc)
1310                        goto out_unlock;
1311        }
1312
1313        /* send SMC Accept CLC message */
1314        rc = smc_clc_send_accept(new_smc, ini.cln_first_contact);
1315        if (rc)
1316                goto out_unlock;
1317
1318        /* SMC-D does not need this lock any more */
1319        if (ism_supported)
1320                mutex_unlock(&smc_server_lgr_pending);
1321
1322        /* receive SMC Confirm CLC message */
1323        rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1324                              SMC_CLC_CONFIRM, CLC_WAIT_TIME);
1325        if (rc) {
1326                if (!ism_supported)
1327                        goto out_unlock;
1328                goto out_decl;
1329        }
1330
1331        /* finish worker */
1332        if (!ism_supported) {
1333                rc = smc_listen_rdma_finish(new_smc, &cclc,
1334                                            ini.cln_first_contact);
1335                mutex_unlock(&smc_server_lgr_pending);
1336                if (rc)
1337                        return;
1338        }
1339        smc_conn_save_peer_info(new_smc, &cclc);
1340        smc_listen_out_connected(new_smc);
1341        return;
1342
1343out_unlock:
1344        mutex_unlock(&smc_server_lgr_pending);
1345out_decl:
1346        smc_listen_decline(new_smc, rc, ini.cln_first_contact);
1347}
1348
1349static void smc_tcp_listen_work(struct work_struct *work)
1350{
1351        struct smc_sock *lsmc = container_of(work, struct smc_sock,
1352                                             tcp_listen_work);
1353        struct sock *lsk = &lsmc->sk;
1354        struct smc_sock *new_smc;
1355        int rc = 0;
1356
1357        lock_sock(lsk);
1358        while (lsk->sk_state == SMC_LISTEN) {
1359                rc = smc_clcsock_accept(lsmc, &new_smc);
1360                if (rc)
1361                        goto out;
1362                if (!new_smc)
1363                        continue;
1364
1365                new_smc->listen_smc = lsmc;
1366                new_smc->use_fallback = lsmc->use_fallback;
1367                new_smc->fallback_rsn = lsmc->fallback_rsn;
1368                sock_hold(lsk); /* sock_put in smc_listen_work */
1369                INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1370                smc_copy_sock_settings_to_smc(new_smc);
1371                new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1372                new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
1373                sock_hold(&new_smc->sk); /* sock_put in passive closing */
1374                if (!schedule_work(&new_smc->smc_listen_work))
1375                        sock_put(&new_smc->sk);
1376        }
1377
1378out:
1379        release_sock(lsk);
1380        sock_put(&lsmc->sk); /* sock_hold in smc_listen */
1381}
1382
1383static int smc_listen(struct socket *sock, int backlog)
1384{
1385        struct sock *sk = sock->sk;
1386        struct smc_sock *smc;
1387        int rc;
1388
1389        smc = smc_sk(sk);
1390        lock_sock(sk);
1391
1392        rc = -EINVAL;
1393        if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
1394            smc->connect_nonblock)
1395                goto out;
1396
1397        rc = 0;
1398        if (sk->sk_state == SMC_LISTEN) {
1399                sk->sk_max_ack_backlog = backlog;
1400                goto out;
1401        }
1402        /* some socket options are handled in core, so we could not apply
1403         * them to the clc socket -- copy smc socket options to clc socket
1404         */
1405        smc_copy_sock_settings_to_clc(smc);
1406        if (!smc->use_fallback)
1407                tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1408
1409        rc = kernel_listen(smc->clcsock, backlog);
1410        if (rc)
1411                goto out;
1412        sk->sk_max_ack_backlog = backlog;
1413        sk->sk_ack_backlog = 0;
1414        sk->sk_state = SMC_LISTEN;
1415        sock_hold(sk); /* sock_hold in tcp_listen_worker */
1416        if (!schedule_work(&smc->tcp_listen_work))
1417                sock_put(sk);
1418
1419out:
1420        release_sock(sk);
1421        return rc;
1422}
1423
1424static int smc_accept(struct socket *sock, struct socket *new_sock,
1425                      int flags, bool kern)
1426{
1427        struct sock *sk = sock->sk, *nsk;
1428        DECLARE_WAITQUEUE(wait, current);
1429        struct smc_sock *lsmc;
1430        long timeo;
1431        int rc = 0;
1432
1433        lsmc = smc_sk(sk);
1434        sock_hold(sk); /* sock_put below */
1435        lock_sock(sk);
1436
1437        if (lsmc->sk.sk_state != SMC_LISTEN) {
1438                rc = -EINVAL;
1439                release_sock(sk);
1440                goto out;
1441        }
1442
1443        /* Wait for an incoming connection */
1444        timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1445        add_wait_queue_exclusive(sk_sleep(sk), &wait);
1446        while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1447                set_current_state(TASK_INTERRUPTIBLE);
1448                if (!timeo) {
1449                        rc = -EAGAIN;
1450                        break;
1451                }
1452                release_sock(sk);
1453                timeo = schedule_timeout(timeo);
1454                /* wakeup by sk_data_ready in smc_listen_work() */
1455                sched_annotate_sleep();
1456                lock_sock(sk);
1457                if (signal_pending(current)) {
1458                        rc = sock_intr_errno(timeo);
1459                        break;
1460                }
1461        }
1462        set_current_state(TASK_RUNNING);
1463        remove_wait_queue(sk_sleep(sk), &wait);
1464
1465        if (!rc)
1466                rc = sock_error(nsk);
1467        release_sock(sk);
1468        if (rc)
1469                goto out;
1470
1471        if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1472                /* wait till data arrives on the socket */
1473                timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1474                                                                MSEC_PER_SEC);
1475                if (smc_sk(nsk)->use_fallback) {
1476                        struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1477
1478                        lock_sock(clcsk);
1479                        if (skb_queue_empty(&clcsk->sk_receive_queue))
1480                                sk_wait_data(clcsk, &timeo, NULL);
1481                        release_sock(clcsk);
1482                } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1483                        lock_sock(nsk);
1484                        smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
1485                        release_sock(nsk);
1486                }
1487        }
1488
1489out:
1490        sock_put(sk); /* sock_hold above */
1491        return rc;
1492}
1493
1494static int smc_getname(struct socket *sock, struct sockaddr *addr,
1495                       int peer)
1496{
1497        struct smc_sock *smc;
1498
1499        if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1500            (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1501                return -ENOTCONN;
1502
1503        smc = smc_sk(sock->sk);
1504
1505        return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1506}
1507
1508static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1509{
1510        struct sock *sk = sock->sk;
1511        struct smc_sock *smc;
1512        int rc = -EPIPE;
1513
1514        smc = smc_sk(sk);
1515        lock_sock(sk);
1516        if ((sk->sk_state != SMC_ACTIVE) &&
1517            (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1518            (sk->sk_state != SMC_INIT))
1519                goto out;
1520
1521        if (msg->msg_flags & MSG_FASTOPEN) {
1522                if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
1523                        smc_switch_to_fallback(smc);
1524                        smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1525                } else {
1526                        rc = -EINVAL;
1527                        goto out;
1528                }
1529        }
1530
1531        if (smc->use_fallback)
1532                rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1533        else
1534                rc = smc_tx_sendmsg(smc, msg, len);
1535out:
1536        release_sock(sk);
1537        return rc;
1538}
1539
1540static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1541                       int flags)
1542{
1543        struct sock *sk = sock->sk;
1544        struct smc_sock *smc;
1545        int rc = -ENOTCONN;
1546
1547        smc = smc_sk(sk);
1548        lock_sock(sk);
1549        if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1550                /* socket was connected before, no more data to read */
1551                rc = 0;
1552                goto out;
1553        }
1554        if ((sk->sk_state == SMC_INIT) ||
1555            (sk->sk_state == SMC_LISTEN) ||
1556            (sk->sk_state == SMC_CLOSED))
1557                goto out;
1558
1559        if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1560                rc = 0;
1561                goto out;
1562        }
1563
1564        if (smc->use_fallback) {
1565                rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1566        } else {
1567                msg->msg_namelen = 0;
1568                rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1569        }
1570
1571out:
1572        release_sock(sk);
1573        return rc;
1574}
1575
1576static __poll_t smc_accept_poll(struct sock *parent)
1577{
1578        struct smc_sock *isk = smc_sk(parent);
1579        __poll_t mask = 0;
1580
1581        spin_lock(&isk->accept_q_lock);
1582        if (!list_empty(&isk->accept_q))
1583                mask = EPOLLIN | EPOLLRDNORM;
1584        spin_unlock(&isk->accept_q_lock);
1585
1586        return mask;
1587}
1588
1589static __poll_t smc_poll(struct file *file, struct socket *sock,
1590                             poll_table *wait)
1591{
1592        struct sock *sk = sock->sk;
1593        struct smc_sock *smc;
1594        __poll_t mask = 0;
1595
1596        if (!sk)
1597                return EPOLLNVAL;
1598
1599        smc = smc_sk(sock->sk);
1600        if (smc->use_fallback) {
1601                /* delegate to CLC child sock */
1602                mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1603                sk->sk_err = smc->clcsock->sk->sk_err;
1604        } else {
1605                if (sk->sk_state != SMC_CLOSED)
1606                        sock_poll_wait(file, sock, wait);
1607                if (sk->sk_err)
1608                        mask |= EPOLLERR;
1609                if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1610                    (sk->sk_state == SMC_CLOSED))
1611                        mask |= EPOLLHUP;
1612                if (sk->sk_state == SMC_LISTEN) {
1613                        /* woken up by sk_data_ready in smc_listen_work() */
1614                        mask |= smc_accept_poll(sk);
1615                } else if (smc->use_fallback) { /* as result of connect_work()*/
1616                        mask |= smc->clcsock->ops->poll(file, smc->clcsock,
1617                                                           wait);
1618                        sk->sk_err = smc->clcsock->sk->sk_err;
1619                } else {
1620                        if ((sk->sk_state != SMC_INIT &&
1621                             atomic_read(&smc->conn.sndbuf_space)) ||
1622                            sk->sk_shutdown & SEND_SHUTDOWN) {
1623                                mask |= EPOLLOUT | EPOLLWRNORM;
1624                        } else {
1625                                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1626                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1627                        }
1628                        if (atomic_read(&smc->conn.bytes_to_rcv))
1629                                mask |= EPOLLIN | EPOLLRDNORM;
1630                        if (sk->sk_shutdown & RCV_SHUTDOWN)
1631                                mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1632                        if (sk->sk_state == SMC_APPCLOSEWAIT1)
1633                                mask |= EPOLLIN;
1634                        if (smc->conn.urg_state == SMC_URG_VALID)
1635                                mask |= EPOLLPRI;
1636                }
1637        }
1638
1639        return mask;
1640}
1641
1642static int smc_shutdown(struct socket *sock, int how)
1643{
1644        struct sock *sk = sock->sk;
1645        struct smc_sock *smc;
1646        int rc = -EINVAL;
1647        int rc1 = 0;
1648
1649        smc = smc_sk(sk);
1650
1651        if ((how < SHUT_RD) || (how > SHUT_RDWR))
1652                return rc;
1653
1654        lock_sock(sk);
1655
1656        rc = -ENOTCONN;
1657        if ((sk->sk_state != SMC_ACTIVE) &&
1658            (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1659            (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1660            (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1661            (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1662            (sk->sk_state != SMC_APPFINCLOSEWAIT))
1663                goto out;
1664        if (smc->use_fallback) {
1665                rc = kernel_sock_shutdown(smc->clcsock, how);
1666                sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1667                if (sk->sk_shutdown == SHUTDOWN_MASK)
1668                        sk->sk_state = SMC_CLOSED;
1669                goto out;
1670        }
1671        switch (how) {
1672        case SHUT_RDWR:         /* shutdown in both directions */
1673                rc = smc_close_active(smc);
1674                break;
1675        case SHUT_WR:
1676                rc = smc_close_shutdown_write(smc);
1677                break;
1678        case SHUT_RD:
1679                rc = 0;
1680                /* nothing more to do because peer is not involved */
1681                break;
1682        }
1683        if (smc->clcsock)
1684                rc1 = kernel_sock_shutdown(smc->clcsock, how);
1685        /* map sock_shutdown_cmd constants to sk_shutdown value range */
1686        sk->sk_shutdown |= how + 1;
1687
1688out:
1689        release_sock(sk);
1690        return rc ? rc : rc1;
1691}
1692
1693static int smc_setsockopt(struct socket *sock, int level, int optname,
1694                          char __user *optval, unsigned int optlen)
1695{
1696        struct sock *sk = sock->sk;
1697        struct smc_sock *smc;
1698        int val, rc;
1699
1700        smc = smc_sk(sk);
1701
1702        /* generic setsockopts reaching us here always apply to the
1703         * CLC socket
1704         */
1705        rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1706                                           optval, optlen);
1707        if (smc->clcsock->sk->sk_err) {
1708                sk->sk_err = smc->clcsock->sk->sk_err;
1709                sk->sk_error_report(sk);
1710        }
1711        if (rc)
1712                return rc;
1713
1714        if (optlen < sizeof(int))
1715                return -EINVAL;
1716        if (get_user(val, (int __user *)optval))
1717                return -EFAULT;
1718
1719        lock_sock(sk);
1720        switch (optname) {
1721        case TCP_ULP:
1722        case TCP_FASTOPEN:
1723        case TCP_FASTOPEN_CONNECT:
1724        case TCP_FASTOPEN_KEY:
1725        case TCP_FASTOPEN_NO_COOKIE:
1726                /* option not supported by SMC */
1727                if (sk->sk_state == SMC_INIT) {
1728                        smc_switch_to_fallback(smc);
1729                        smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1730                } else {
1731                        if (!smc->use_fallback)
1732                                rc = -EINVAL;
1733                }
1734                break;
1735        case TCP_NODELAY:
1736                if (sk->sk_state != SMC_INIT &&
1737                    sk->sk_state != SMC_LISTEN &&
1738                    sk->sk_state != SMC_CLOSED) {
1739                        if (val && !smc->use_fallback)
1740                                mod_delayed_work(system_wq, &smc->conn.tx_work,
1741                                                 0);
1742                }
1743                break;
1744        case TCP_CORK:
1745                if (sk->sk_state != SMC_INIT &&
1746                    sk->sk_state != SMC_LISTEN &&
1747                    sk->sk_state != SMC_CLOSED) {
1748                        if (!val && !smc->use_fallback)
1749                                mod_delayed_work(system_wq, &smc->conn.tx_work,
1750                                                 0);
1751                }
1752                break;
1753        case TCP_DEFER_ACCEPT:
1754                smc->sockopt_defer_accept = val;
1755                break;
1756        default:
1757                break;
1758        }
1759        release_sock(sk);
1760
1761        return rc;
1762}
1763
1764static int smc_getsockopt(struct socket *sock, int level, int optname,
1765                          char __user *optval, int __user *optlen)
1766{
1767        struct smc_sock *smc;
1768
1769        smc = smc_sk(sock->sk);
1770        /* socket options apply to the CLC socket */
1771        return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1772                                             optval, optlen);
1773}
1774
1775static int smc_ioctl(struct socket *sock, unsigned int cmd,
1776                     unsigned long arg)
1777{
1778        union smc_host_cursor cons, urg;
1779        struct smc_connection *conn;
1780        struct smc_sock *smc;
1781        int answ;
1782
1783        smc = smc_sk(sock->sk);
1784        conn = &smc->conn;
1785        lock_sock(&smc->sk);
1786        if (smc->use_fallback) {
1787                if (!smc->clcsock) {
1788                        release_sock(&smc->sk);
1789                        return -EBADF;
1790                }
1791                answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1792                release_sock(&smc->sk);
1793                return answ;
1794        }
1795        switch (cmd) {
1796        case SIOCINQ: /* same as FIONREAD */
1797                if (smc->sk.sk_state == SMC_LISTEN) {
1798                        release_sock(&smc->sk);
1799                        return -EINVAL;
1800                }
1801                if (smc->sk.sk_state == SMC_INIT ||
1802                    smc->sk.sk_state == SMC_CLOSED)
1803                        answ = 0;
1804                else
1805                        answ = atomic_read(&smc->conn.bytes_to_rcv);
1806                break;
1807        case SIOCOUTQ:
1808                /* output queue size (not send + not acked) */
1809                if (smc->sk.sk_state == SMC_LISTEN) {
1810                        release_sock(&smc->sk);
1811                        return -EINVAL;
1812                }
1813                if (smc->sk.sk_state == SMC_INIT ||
1814                    smc->sk.sk_state == SMC_CLOSED)
1815                        answ = 0;
1816                else
1817                        answ = smc->conn.sndbuf_desc->len -
1818                                        atomic_read(&smc->conn.sndbuf_space);
1819                break;
1820        case SIOCOUTQNSD:
1821                /* output queue size (not send only) */
1822                if (smc->sk.sk_state == SMC_LISTEN) {
1823                        release_sock(&smc->sk);
1824                        return -EINVAL;
1825                }
1826                if (smc->sk.sk_state == SMC_INIT ||
1827                    smc->sk.sk_state == SMC_CLOSED)
1828                        answ = 0;
1829                else
1830                        answ = smc_tx_prepared_sends(&smc->conn);
1831                break;
1832        case SIOCATMARK:
1833                if (smc->sk.sk_state == SMC_LISTEN) {
1834                        release_sock(&smc->sk);
1835                        return -EINVAL;
1836                }
1837                if (smc->sk.sk_state == SMC_INIT ||
1838                    smc->sk.sk_state == SMC_CLOSED) {
1839                        answ = 0;
1840                } else {
1841                        smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1842                        smc_curs_copy(&urg, &conn->urg_curs, conn);
1843                        answ = smc_curs_diff(conn->rmb_desc->len,
1844                                             &cons, &urg) == 1;
1845                }
1846                break;
1847        default:
1848                release_sock(&smc->sk);
1849                return -ENOIOCTLCMD;
1850        }
1851        release_sock(&smc->sk);
1852
1853        return put_user(answ, (int __user *)arg);
1854}
1855
1856static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1857                            int offset, size_t size, int flags)
1858{
1859        struct sock *sk = sock->sk;
1860        struct smc_sock *smc;
1861        int rc = -EPIPE;
1862
1863        smc = smc_sk(sk);
1864        lock_sock(sk);
1865        if (sk->sk_state != SMC_ACTIVE) {
1866                release_sock(sk);
1867                goto out;
1868        }
1869        release_sock(sk);
1870        if (smc->use_fallback)
1871                rc = kernel_sendpage(smc->clcsock, page, offset,
1872                                     size, flags);
1873        else
1874                rc = sock_no_sendpage(sock, page, offset, size, flags);
1875
1876out:
1877        return rc;
1878}
1879
1880/* Map the affected portions of the rmbe into an spd, note the number of bytes
1881 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1882 * updates till whenever a respective page has been fully processed.
1883 * Note that subsequent recv() calls have to wait till all splice() processing
1884 * completed.
1885 */
1886static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1887                               struct pipe_inode_info *pipe, size_t len,
1888                               unsigned int flags)
1889{
1890        struct sock *sk = sock->sk;
1891        struct smc_sock *smc;
1892        int rc = -ENOTCONN;
1893
1894        smc = smc_sk(sk);
1895        lock_sock(sk);
1896        if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1897                /* socket was connected before, no more data to read */
1898                rc = 0;
1899                goto out;
1900        }
1901        if (sk->sk_state == SMC_INIT ||
1902            sk->sk_state == SMC_LISTEN ||
1903            sk->sk_state == SMC_CLOSED)
1904                goto out;
1905
1906        if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1907                rc = 0;
1908                goto out;
1909        }
1910
1911        if (smc->use_fallback) {
1912                rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1913                                                    pipe, len, flags);
1914        } else {
1915                if (*ppos) {
1916                        rc = -ESPIPE;
1917                        goto out;
1918                }
1919                if (flags & SPLICE_F_NONBLOCK)
1920                        flags = MSG_DONTWAIT;
1921                else
1922                        flags = 0;
1923                rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
1924        }
1925out:
1926        release_sock(sk);
1927
1928        return rc;
1929}
1930
1931/* must look like tcp */
1932static const struct proto_ops smc_sock_ops = {
1933        .family         = PF_SMC,
1934        .owner          = THIS_MODULE,
1935        .release        = smc_release,
1936        .bind           = smc_bind,
1937        .connect        = smc_connect,
1938        .socketpair     = sock_no_socketpair,
1939        .accept         = smc_accept,
1940        .getname        = smc_getname,
1941        .poll           = smc_poll,
1942        .ioctl          = smc_ioctl,
1943        .listen         = smc_listen,
1944        .shutdown       = smc_shutdown,
1945        .setsockopt     = smc_setsockopt,
1946        .getsockopt     = smc_getsockopt,
1947        .sendmsg        = smc_sendmsg,
1948        .recvmsg        = smc_recvmsg,
1949        .mmap           = sock_no_mmap,
1950        .sendpage       = smc_sendpage,
1951        .splice_read    = smc_splice_read,
1952};
1953
1954static int smc_create(struct net *net, struct socket *sock, int protocol,
1955                      int kern)
1956{
1957        int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
1958        struct smc_sock *smc;
1959        struct sock *sk;
1960        int rc;
1961
1962        rc = -ESOCKTNOSUPPORT;
1963        if (sock->type != SOCK_STREAM)
1964                goto out;
1965
1966        rc = -EPROTONOSUPPORT;
1967        if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
1968                goto out;
1969
1970        rc = -ENOBUFS;
1971        sock->ops = &smc_sock_ops;
1972        sk = smc_sock_alloc(net, sock, protocol);
1973        if (!sk)
1974                goto out;
1975
1976        /* create internal TCP socket for CLC handshake and fallback */
1977        smc = smc_sk(sk);
1978        smc->use_fallback = false; /* assume rdma capability first */
1979        smc->fallback_rsn = 0;
1980        rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1981                              &smc->clcsock);
1982        if (rc) {
1983                sk_common_release(sk);
1984                goto out;
1985        }
1986        smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1987        smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
1988
1989out:
1990        return rc;
1991}
1992
1993static const struct net_proto_family smc_sock_family_ops = {
1994        .family = PF_SMC,
1995        .owner  = THIS_MODULE,
1996        .create = smc_create,
1997};
1998
1999unsigned int smc_net_id;
2000
2001static __net_init int smc_net_init(struct net *net)
2002{
2003        return smc_pnet_net_init(net);
2004}
2005
2006static void __net_exit smc_net_exit(struct net *net)
2007{
2008        smc_pnet_net_exit(net);
2009}
2010
2011static struct pernet_operations smc_net_ops = {
2012        .init = smc_net_init,
2013        .exit = smc_net_exit,
2014        .id   = &smc_net_id,
2015        .size = sizeof(struct smc_net),
2016};
2017
2018static int __init smc_init(void)
2019{
2020        int rc;
2021
2022        rc = register_pernet_subsys(&smc_net_ops);
2023        if (rc)
2024                return rc;
2025
2026        rc = smc_pnet_init();
2027        if (rc)
2028                goto out_pernet_subsys;
2029
2030        rc = smc_llc_init();
2031        if (rc) {
2032                pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
2033                goto out_pnet;
2034        }
2035
2036        rc = smc_cdc_init();
2037        if (rc) {
2038                pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
2039                goto out_pnet;
2040        }
2041
2042        rc = proto_register(&smc_proto, 1);
2043        if (rc) {
2044                pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
2045                goto out_pnet;
2046        }
2047
2048        rc = proto_register(&smc_proto6, 1);
2049        if (rc) {
2050                pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
2051                goto out_proto;
2052        }
2053
2054        rc = sock_register(&smc_sock_family_ops);
2055        if (rc) {
2056                pr_err("%s: sock_register fails with %d\n", __func__, rc);
2057                goto out_proto6;
2058        }
2059        INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
2060        INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
2061
2062        rc = smc_ib_register_client();
2063        if (rc) {
2064                pr_err("%s: ib_register fails with %d\n", __func__, rc);
2065                goto out_sock;
2066        }
2067
2068        static_branch_enable(&tcp_have_smc);
2069        return 0;
2070
2071out_sock:
2072        sock_unregister(PF_SMC);
2073out_proto6:
2074        proto_unregister(&smc_proto6);
2075out_proto:
2076        proto_unregister(&smc_proto);
2077out_pnet:
2078        smc_pnet_exit();
2079out_pernet_subsys:
2080        unregister_pernet_subsys(&smc_net_ops);
2081
2082        return rc;
2083}
2084
2085static void __exit smc_exit(void)
2086{
2087        smc_core_exit();
2088        static_branch_disable(&tcp_have_smc);
2089        smc_ib_unregister_client();
2090        sock_unregister(PF_SMC);
2091        proto_unregister(&smc_proto6);
2092        proto_unregister(&smc_proto);
2093        smc_pnet_exit();
2094        unregister_pernet_subsys(&smc_net_ops);
2095}
2096
2097module_init(smc_init);
2098module_exit(smc_exit);
2099
2100MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
2101MODULE_DESCRIPTION("smc socket address family");
2102MODULE_LICENSE("GPL");
2103MODULE_ALIAS_NETPROTO(PF_SMC);
2104