linux/net/smc/af_smc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
   6 *  applies to SOCK_STREAM sockets only
   7 *  offers an alternative communication option for TCP-protocol sockets
   8 *  applicable with RoCE-cards only
   9 *
  10 *  Initial restrictions:
  11 *    - support for alternate links postponed
  12 *
  13 *  Copyright IBM Corp. 2016, 2018
  14 *
  15 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  16 *              based on prototype from Frank Blaschka
  17 */
  18
  19#define KMSG_COMPONENT "smc"
  20#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  21
  22#include <linux/module.h>
  23#include <linux/socket.h>
  24#include <linux/workqueue.h>
  25#include <linux/in.h>
  26#include <linux/sched/signal.h>
  27#include <linux/if_vlan.h>
  28
  29#include <net/sock.h>
  30#include <net/tcp.h>
  31#include <net/smc.h>
  32#include <asm/ioctls.h>
  33
  34#include <net/net_namespace.h>
  35#include <net/netns/generic.h>
  36#include "smc_netns.h"
  37
  38#include "smc.h"
  39#include "smc_clc.h"
  40#include "smc_llc.h"
  41#include "smc_cdc.h"
  42#include "smc_core.h"
  43#include "smc_ib.h"
  44#include "smc_ism.h"
  45#include "smc_pnet.h"
  46#include "smc_tx.h"
  47#include "smc_rx.h"
  48#include "smc_close.h"
  49
  50static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link group
  51                                                 * creation on server
  52                                                 */
  53static DEFINE_MUTEX(smc_client_lgr_pending);    /* serialize link group
  54                                                 * creation on client
  55                                                 */
  56
  57static void smc_tcp_listen_work(struct work_struct *);
  58static void smc_connect_work(struct work_struct *);
  59
  60static void smc_set_keepalive(struct sock *sk, int val)
  61{
  62        struct smc_sock *smc = smc_sk(sk);
  63
  64        smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
  65}
  66
  67static struct smc_hashinfo smc_v4_hashinfo = {
  68        .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
  69};
  70
  71static struct smc_hashinfo smc_v6_hashinfo = {
  72        .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
  73};
  74
  75int smc_hash_sk(struct sock *sk)
  76{
  77        struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  78        struct hlist_head *head;
  79
  80        head = &h->ht;
  81
  82        write_lock_bh(&h->lock);
  83        sk_add_node(sk, head);
  84        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
  85        write_unlock_bh(&h->lock);
  86
  87        return 0;
  88}
  89EXPORT_SYMBOL_GPL(smc_hash_sk);
  90
  91void smc_unhash_sk(struct sock *sk)
  92{
  93        struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  94
  95        write_lock_bh(&h->lock);
  96        if (sk_del_node_init(sk))
  97                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  98        write_unlock_bh(&h->lock);
  99}
 100EXPORT_SYMBOL_GPL(smc_unhash_sk);
 101
 102struct proto smc_proto = {
 103        .name           = "SMC",
 104        .owner          = THIS_MODULE,
 105        .keepalive      = smc_set_keepalive,
 106        .hash           = smc_hash_sk,
 107        .unhash         = smc_unhash_sk,
 108        .obj_size       = sizeof(struct smc_sock),
 109        .h.smc_hash     = &smc_v4_hashinfo,
 110        .slab_flags     = SLAB_TYPESAFE_BY_RCU,
 111};
 112EXPORT_SYMBOL_GPL(smc_proto);
 113
 114struct proto smc_proto6 = {
 115        .name           = "SMC6",
 116        .owner          = THIS_MODULE,
 117        .keepalive      = smc_set_keepalive,
 118        .hash           = smc_hash_sk,
 119        .unhash         = smc_unhash_sk,
 120        .obj_size       = sizeof(struct smc_sock),
 121        .h.smc_hash     = &smc_v6_hashinfo,
 122        .slab_flags     = SLAB_TYPESAFE_BY_RCU,
 123};
 124EXPORT_SYMBOL_GPL(smc_proto6);
 125
 126static int smc_release(struct socket *sock)
 127{
 128        struct sock *sk = sock->sk;
 129        struct smc_sock *smc;
 130        int rc = 0;
 131
 132        if (!sk)
 133                goto out;
 134
 135        smc = smc_sk(sk);
 136
 137        /* cleanup for a dangling non-blocking connect */
 138        if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
 139                tcp_abort(smc->clcsock->sk, ECONNABORTED);
 140        flush_work(&smc->connect_work);
 141
 142        if (sk->sk_state == SMC_LISTEN)
 143                /* smc_close_non_accepted() is called and acquires
 144                 * sock lock for child sockets again
 145                 */
 146                lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 147        else
 148                lock_sock(sk);
 149
 150        if (!smc->use_fallback) {
 151                rc = smc_close_active(smc);
 152                sock_set_flag(sk, SOCK_DEAD);
 153                sk->sk_shutdown |= SHUTDOWN_MASK;
 154        } else {
 155                if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
 156                        sock_put(sk); /* passive closing */
 157                if (sk->sk_state == SMC_LISTEN) {
 158                        /* wake up clcsock accept */
 159                        rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
 160                }
 161                sk->sk_state = SMC_CLOSED;
 162                sk->sk_state_change(sk);
 163        }
 164
 165        sk->sk_prot->unhash(sk);
 166
 167        if (sk->sk_state == SMC_CLOSED) {
 168                if (smc->clcsock) {
 169                        release_sock(sk);
 170                        smc_clcsock_release(smc);
 171                        lock_sock(sk);
 172                }
 173                if (!smc->use_fallback)
 174                        smc_conn_free(&smc->conn);
 175        }
 176
 177        /* detach socket */
 178        sock_orphan(sk);
 179        sock->sk = NULL;
 180        release_sock(sk);
 181
 182        sock_put(sk); /* final sock_put */
 183out:
 184        return rc;
 185}
 186
 187static void smc_destruct(struct sock *sk)
 188{
 189        if (sk->sk_state != SMC_CLOSED)
 190                return;
 191        if (!sock_flag(sk, SOCK_DEAD))
 192                return;
 193
 194        sk_refcnt_debug_dec(sk);
 195}
 196
 197static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
 198                                   int protocol)
 199{
 200        struct smc_sock *smc;
 201        struct proto *prot;
 202        struct sock *sk;
 203
 204        prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
 205        sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
 206        if (!sk)
 207                return NULL;
 208
 209        sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
 210        sk->sk_state = SMC_INIT;
 211        sk->sk_destruct = smc_destruct;
 212        sk->sk_protocol = protocol;
 213        smc = smc_sk(sk);
 214        INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
 215        INIT_WORK(&smc->connect_work, smc_connect_work);
 216        INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
 217        INIT_LIST_HEAD(&smc->accept_q);
 218        spin_lock_init(&smc->accept_q_lock);
 219        spin_lock_init(&smc->conn.send_lock);
 220        sk->sk_prot->hash(sk);
 221        sk_refcnt_debug_inc(sk);
 222        mutex_init(&smc->clcsock_release_lock);
 223
 224        return sk;
 225}
 226
 227static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
 228                    int addr_len)
 229{
 230        struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
 231        struct sock *sk = sock->sk;
 232        struct smc_sock *smc;
 233        int rc;
 234
 235        smc = smc_sk(sk);
 236
 237        /* replicate tests from inet_bind(), to be safe wrt. future changes */
 238        rc = -EINVAL;
 239        if (addr_len < sizeof(struct sockaddr_in))
 240                goto out;
 241
 242        rc = -EAFNOSUPPORT;
 243        if (addr->sin_family != AF_INET &&
 244            addr->sin_family != AF_INET6 &&
 245            addr->sin_family != AF_UNSPEC)
 246                goto out;
 247        /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
 248        if (addr->sin_family == AF_UNSPEC &&
 249            addr->sin_addr.s_addr != htonl(INADDR_ANY))
 250                goto out;
 251
 252        lock_sock(sk);
 253
 254        /* Check if socket is already active */
 255        rc = -EINVAL;
 256        if (sk->sk_state != SMC_INIT)
 257                goto out_rel;
 258
 259        smc->clcsock->sk->sk_reuse = sk->sk_reuse;
 260        rc = kernel_bind(smc->clcsock, uaddr, addr_len);
 261
 262out_rel:
 263        release_sock(sk);
 264out:
 265        return rc;
 266}
 267
 268static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
 269                                   unsigned long mask)
 270{
 271        /* options we don't get control via setsockopt for */
 272        nsk->sk_type = osk->sk_type;
 273        nsk->sk_sndbuf = osk->sk_sndbuf;
 274        nsk->sk_rcvbuf = osk->sk_rcvbuf;
 275        nsk->sk_sndtimeo = osk->sk_sndtimeo;
 276        nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
 277        nsk->sk_mark = osk->sk_mark;
 278        nsk->sk_priority = osk->sk_priority;
 279        nsk->sk_rcvlowat = osk->sk_rcvlowat;
 280        nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
 281        nsk->sk_err = osk->sk_err;
 282
 283        nsk->sk_flags &= ~mask;
 284        nsk->sk_flags |= osk->sk_flags & mask;
 285}
 286
 287#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
 288                             (1UL << SOCK_KEEPOPEN) | \
 289                             (1UL << SOCK_LINGER) | \
 290                             (1UL << SOCK_BROADCAST) | \
 291                             (1UL << SOCK_TIMESTAMP) | \
 292                             (1UL << SOCK_DBG) | \
 293                             (1UL << SOCK_RCVTSTAMP) | \
 294                             (1UL << SOCK_RCVTSTAMPNS) | \
 295                             (1UL << SOCK_LOCALROUTE) | \
 296                             (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
 297                             (1UL << SOCK_RXQ_OVFL) | \
 298                             (1UL << SOCK_WIFI_STATUS) | \
 299                             (1UL << SOCK_NOFCS) | \
 300                             (1UL << SOCK_FILTER_LOCKED) | \
 301                             (1UL << SOCK_TSTAMP_NEW))
 302/* copy only relevant settings and flags of SOL_SOCKET level from smc to
 303 * clc socket (since smc is not called for these options from net/core)
 304 */
 305static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
 306{
 307        smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
 308}
 309
 310#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
 311                             (1UL << SOCK_KEEPOPEN) | \
 312                             (1UL << SOCK_LINGER) | \
 313                             (1UL << SOCK_DBG))
 314/* copy only settings and flags relevant for smc from clc to smc socket */
 315static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 316{
 317        smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 318}
 319
 320/* register a new rmb, send confirm_rkey msg to register with peer */
 321static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
 322                       bool conf_rkey)
 323{
 324        if (!rmb_desc->wr_reg) {
 325                /* register memory region for new rmb */
 326                if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
 327                        rmb_desc->regerr = 1;
 328                        return -EFAULT;
 329                }
 330                rmb_desc->wr_reg = 1;
 331        }
 332        if (!conf_rkey)
 333                return 0;
 334        /* exchange confirm_rkey msg with peer */
 335        if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
 336                rmb_desc->regerr = 1;
 337                return -EFAULT;
 338        }
 339        return 0;
 340}
 341
 342static int smc_clnt_conf_first_link(struct smc_sock *smc)
 343{
 344        struct net *net = sock_net(smc->clcsock->sk);
 345        struct smc_link_group *lgr = smc->conn.lgr;
 346        struct smc_link *link;
 347        int rest;
 348        int rc;
 349
 350        link = &lgr->lnk[SMC_SINGLE_LINK];
 351        /* receive CONFIRM LINK request from server over RoCE fabric */
 352        rest = wait_for_completion_interruptible_timeout(
 353                &link->llc_confirm,
 354                SMC_LLC_WAIT_FIRST_TIME);
 355        if (rest <= 0) {
 356                struct smc_clc_msg_decline dclc;
 357
 358                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 359                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 360                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 361        }
 362
 363        if (link->llc_confirm_rc)
 364                return SMC_CLC_DECL_RMBE_EC;
 365
 366        rc = smc_ib_modify_qp_rts(link);
 367        if (rc)
 368                return SMC_CLC_DECL_ERR_RDYLNK;
 369
 370        smc_wr_remember_qp_attr(link);
 371
 372        if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
 373                return SMC_CLC_DECL_ERR_REGRMB;
 374
 375        /* send CONFIRM LINK response over RoCE fabric */
 376        rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
 377        if (rc < 0)
 378                return SMC_CLC_DECL_TIMEOUT_CL;
 379
 380        /* receive ADD LINK request from server over RoCE fabric */
 381        rest = wait_for_completion_interruptible_timeout(&link->llc_add,
 382                                                         SMC_LLC_WAIT_TIME);
 383        if (rest <= 0) {
 384                struct smc_clc_msg_decline dclc;
 385
 386                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 387                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 388                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
 389        }
 390
 391        /* send add link reject message, only one link supported for now */
 392        rc = smc_llc_send_add_link(link,
 393                                   link->smcibdev->mac[link->ibport - 1],
 394                                   link->gid, SMC_LLC_RESP);
 395        if (rc < 0)
 396                return SMC_CLC_DECL_TIMEOUT_AL;
 397
 398        smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 399
 400        return 0;
 401}
 402
 403static void smcr_conn_save_peer_info(struct smc_sock *smc,
 404                                     struct smc_clc_msg_accept_confirm *clc)
 405{
 406        int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
 407
 408        smc->conn.peer_rmbe_idx = clc->rmbe_idx;
 409        smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
 410        smc->conn.peer_rmbe_size = bufsize;
 411        atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
 412        smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
 413}
 414
 415static void smcd_conn_save_peer_info(struct smc_sock *smc,
 416                                     struct smc_clc_msg_accept_confirm *clc)
 417{
 418        int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
 419
 420        smc->conn.peer_rmbe_idx = clc->dmbe_idx;
 421        smc->conn.peer_token = clc->token;
 422        /* msg header takes up space in the buffer */
 423        smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
 424        atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
 425        smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
 426}
 427
 428static void smc_conn_save_peer_info(struct smc_sock *smc,
 429                                    struct smc_clc_msg_accept_confirm *clc)
 430{
 431        if (smc->conn.lgr->is_smcd)
 432                smcd_conn_save_peer_info(smc, clc);
 433        else
 434                smcr_conn_save_peer_info(smc, clc);
 435}
 436
 437static void smc_link_save_peer_info(struct smc_link *link,
 438                                    struct smc_clc_msg_accept_confirm *clc)
 439{
 440        link->peer_qpn = ntoh24(clc->qpn);
 441        memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
 442        memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
 443        link->peer_psn = ntoh24(clc->psn);
 444        link->peer_mtu = clc->qp_mtu;
 445}
 446
 447static void smc_switch_to_fallback(struct smc_sock *smc)
 448{
 449        smc->use_fallback = true;
 450        if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
 451                smc->clcsock->file = smc->sk.sk_socket->file;
 452                smc->clcsock->file->private_data = smc->clcsock;
 453        }
 454}
 455
 456/* fall back during connect */
 457static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 458{
 459        smc_switch_to_fallback(smc);
 460        smc->fallback_rsn = reason_code;
 461        smc_copy_sock_settings_to_clc(smc);
 462        smc->connect_nonblock = 0;
 463        if (smc->sk.sk_state == SMC_INIT)
 464                smc->sk.sk_state = SMC_ACTIVE;
 465        return 0;
 466}
 467
 468/* decline and fall back during connect */
 469static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
 470{
 471        int rc;
 472
 473        if (reason_code < 0) { /* error, fallback is not possible */
 474                if (smc->sk.sk_state == SMC_INIT)
 475                        sock_put(&smc->sk); /* passive closing */
 476                return reason_code;
 477        }
 478        if (reason_code != SMC_CLC_DECL_PEERDECL) {
 479                rc = smc_clc_send_decline(smc, reason_code);
 480                if (rc < 0) {
 481                        if (smc->sk.sk_state == SMC_INIT)
 482                                sock_put(&smc->sk); /* passive closing */
 483                        return rc;
 484                }
 485        }
 486        return smc_connect_fallback(smc, reason_code);
 487}
 488
 489/* abort connecting */
 490static int smc_connect_abort(struct smc_sock *smc, int reason_code,
 491                             int local_contact)
 492{
 493        if (local_contact == SMC_FIRST_CONTACT)
 494                smc_lgr_forget(smc->conn.lgr);
 495        if (smc->conn.lgr->is_smcd)
 496                /* there is only one lgr role for SMC-D; use server lock */
 497                mutex_unlock(&smc_server_lgr_pending);
 498        else
 499                mutex_unlock(&smc_client_lgr_pending);
 500
 501        smc_conn_free(&smc->conn);
 502        smc->connect_nonblock = 0;
 503        return reason_code;
 504}
 505
 506/* check if there is a rdma device available for this connection. */
 507/* called for connect and listen */
 508static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
 509{
 510        /* PNET table look up: search active ib_device and port
 511         * within same PNETID that also contains the ethernet device
 512         * used for the internal TCP socket
 513         */
 514        smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
 515        if (!ini->ib_dev)
 516                return SMC_CLC_DECL_NOSMCRDEV;
 517        return 0;
 518}
 519
 520/* check if there is an ISM device available for this connection. */
 521/* called for connect and listen */
 522static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
 523{
 524        /* Find ISM device with same PNETID as connecting interface  */
 525        smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
 526        if (!ini->ism_dev)
 527                return SMC_CLC_DECL_NOSMCDDEV;
 528        return 0;
 529}
 530
 531/* Check for VLAN ID and register it on ISM device just for CLC handshake */
 532static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
 533                                      struct smc_init_info *ini)
 534{
 535        if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
 536                return SMC_CLC_DECL_ISMVLANERR;
 537        return 0;
 538}
 539
 540/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
 541 * used, the VLAN ID will be registered again during the connection setup.
 542 */
 543static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
 544                                        struct smc_init_info *ini)
 545{
 546        if (!is_smcd)
 547                return 0;
 548        if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
 549                return SMC_CLC_DECL_CNFERR;
 550        return 0;
 551}
 552
 553/* CLC handshake during connect */
 554static int smc_connect_clc(struct smc_sock *smc, int smc_type,
 555                           struct smc_clc_msg_accept_confirm *aclc,
 556                           struct smc_init_info *ini)
 557{
 558        int rc = 0;
 559
 560        /* do inband token exchange */
 561        rc = smc_clc_send_proposal(smc, smc_type, ini);
 562        if (rc)
 563                return rc;
 564        /* receive SMC Accept CLC message */
 565        return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
 566                                CLC_WAIT_TIME);
 567}
 568
 569/* setup for RDMA connection of client */
 570static int smc_connect_rdma(struct smc_sock *smc,
 571                            struct smc_clc_msg_accept_confirm *aclc,
 572                            struct smc_init_info *ini)
 573{
 574        struct smc_link *link;
 575        int reason_code = 0;
 576
 577        ini->is_smcd = false;
 578        ini->ib_lcl = &aclc->lcl;
 579        ini->ib_clcqpn = ntoh24(aclc->qpn);
 580        ini->srv_first_contact = aclc->hdr.flag;
 581
 582        mutex_lock(&smc_client_lgr_pending);
 583        reason_code = smc_conn_create(smc, ini);
 584        if (reason_code) {
 585                mutex_unlock(&smc_client_lgr_pending);
 586                return reason_code;
 587        }
 588        link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
 589
 590        smc_conn_save_peer_info(smc, aclc);
 591
 592        /* create send buffer and rmb */
 593        if (smc_buf_create(smc, false))
 594                return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
 595                                         ini->cln_first_contact);
 596
 597        if (ini->cln_first_contact == SMC_FIRST_CONTACT)
 598                smc_link_save_peer_info(link, aclc);
 599
 600        if (smc_rmb_rtoken_handling(&smc->conn, aclc))
 601                return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
 602                                         ini->cln_first_contact);
 603
 604        smc_close_init(smc);
 605        smc_rx_init(smc);
 606
 607        if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 608                if (smc_ib_ready_link(link))
 609                        return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
 610                                                 ini->cln_first_contact);
 611        } else {
 612                if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
 613                        return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
 614                                                 ini->cln_first_contact);
 615        }
 616        smc_rmb_sync_sg_for_device(&smc->conn);
 617
 618        reason_code = smc_clc_send_confirm(smc);
 619        if (reason_code)
 620                return smc_connect_abort(smc, reason_code,
 621                                         ini->cln_first_contact);
 622
 623        smc_tx_init(smc);
 624
 625        if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 626                /* QP confirmation over RoCE fabric */
 627                reason_code = smc_clnt_conf_first_link(smc);
 628                if (reason_code)
 629                        return smc_connect_abort(smc, reason_code,
 630                                                 ini->cln_first_contact);
 631        }
 632        mutex_unlock(&smc_client_lgr_pending);
 633
 634        smc_copy_sock_settings_to_clc(smc);
 635        smc->connect_nonblock = 0;
 636        if (smc->sk.sk_state == SMC_INIT)
 637                smc->sk.sk_state = SMC_ACTIVE;
 638
 639        return 0;
 640}
 641
 642/* setup for ISM connection of client */
 643static int smc_connect_ism(struct smc_sock *smc,
 644                           struct smc_clc_msg_accept_confirm *aclc,
 645                           struct smc_init_info *ini)
 646{
 647        int rc = 0;
 648
 649        ini->is_smcd = true;
 650        ini->ism_gid = aclc->gid;
 651        ini->srv_first_contact = aclc->hdr.flag;
 652
 653        /* there is only one lgr role for SMC-D; use server lock */
 654        mutex_lock(&smc_server_lgr_pending);
 655        rc = smc_conn_create(smc, ini);
 656        if (rc) {
 657                mutex_unlock(&smc_server_lgr_pending);
 658                return rc;
 659        }
 660
 661        /* Create send and receive buffers */
 662        if (smc_buf_create(smc, true))
 663                return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
 664                                         ini->cln_first_contact);
 665
 666        smc_conn_save_peer_info(smc, aclc);
 667        smc_close_init(smc);
 668        smc_rx_init(smc);
 669        smc_tx_init(smc);
 670
 671        rc = smc_clc_send_confirm(smc);
 672        if (rc)
 673                return smc_connect_abort(smc, rc, ini->cln_first_contact);
 674        mutex_unlock(&smc_server_lgr_pending);
 675
 676        smc_copy_sock_settings_to_clc(smc);
 677        smc->connect_nonblock = 0;
 678        if (smc->sk.sk_state == SMC_INIT)
 679                smc->sk.sk_state = SMC_ACTIVE;
 680
 681        return 0;
 682}
 683
 684/* perform steps before actually connecting */
 685static int __smc_connect(struct smc_sock *smc)
 686{
 687        bool ism_supported = false, rdma_supported = false;
 688        struct smc_clc_msg_accept_confirm aclc;
 689        struct smc_init_info ini = {0};
 690        int smc_type;
 691        int rc = 0;
 692
 693        sock_hold(&smc->sk); /* sock put in passive closing */
 694
 695        if (smc->use_fallback)
 696                return smc_connect_fallback(smc, smc->fallback_rsn);
 697
 698        /* if peer has not signalled SMC-capability, fall back */
 699        if (!tcp_sk(smc->clcsock->sk)->syn_smc)
 700                return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
 701
 702        /* IPSec connections opt out of SMC-R optimizations */
 703        if (using_ipsec(smc))
 704                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
 705
 706        /* get vlan id from IP device */
 707        if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
 708                return smc_connect_decline_fallback(smc,
 709                                                    SMC_CLC_DECL_GETVLANERR);
 710
 711        /* check if there is an ism device available */
 712        if (!smc_find_ism_device(smc, &ini) &&
 713            !smc_connect_ism_vlan_setup(smc, &ini)) {
 714                /* ISM is supported for this connection */
 715                ism_supported = true;
 716                smc_type = SMC_TYPE_D;
 717        }
 718
 719        /* check if there is a rdma device available */
 720        if (!smc_find_rdma_device(smc, &ini)) {
 721                /* RDMA is supported for this connection */
 722                rdma_supported = true;
 723                if (ism_supported)
 724                        smc_type = SMC_TYPE_B; /* both */
 725                else
 726                        smc_type = SMC_TYPE_R; /* only RDMA */
 727        }
 728
 729        /* if neither ISM nor RDMA are supported, fallback */
 730        if (!rdma_supported && !ism_supported)
 731                return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
 732
 733        /* perform CLC handshake */
 734        rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
 735        if (rc) {
 736                smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 737                return smc_connect_decline_fallback(smc, rc);
 738        }
 739
 740        /* depending on previous steps, connect using rdma or ism */
 741        if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
 742                rc = smc_connect_rdma(smc, &aclc, &ini);
 743        else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
 744                rc = smc_connect_ism(smc, &aclc, &ini);
 745        else
 746                rc = SMC_CLC_DECL_MODEUNSUPP;
 747        if (rc) {
 748                smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 749                return smc_connect_decline_fallback(smc, rc);
 750        }
 751
 752        smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
 753        return 0;
 754}
 755
 756static void smc_connect_work(struct work_struct *work)
 757{
 758        struct smc_sock *smc = container_of(work, struct smc_sock,
 759                                            connect_work);
 760        long timeo = smc->sk.sk_sndtimeo;
 761        int rc = 0;
 762
 763        if (!timeo)
 764                timeo = MAX_SCHEDULE_TIMEOUT;
 765        lock_sock(smc->clcsock->sk);
 766        if (smc->clcsock->sk->sk_err) {
 767                smc->sk.sk_err = smc->clcsock->sk->sk_err;
 768        } else if ((1 << smc->clcsock->sk->sk_state) &
 769                                        (TCPF_SYN_SENT | TCP_SYN_RECV)) {
 770                rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
 771                if ((rc == -EPIPE) &&
 772                    ((1 << smc->clcsock->sk->sk_state) &
 773                                        (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
 774                        rc = 0;
 775        }
 776        release_sock(smc->clcsock->sk);
 777        lock_sock(&smc->sk);
 778        if (rc != 0 || smc->sk.sk_err) {
 779                smc->sk.sk_state = SMC_CLOSED;
 780                if (rc == -EPIPE || rc == -EAGAIN)
 781                        smc->sk.sk_err = EPIPE;
 782                else if (signal_pending(current))
 783                        smc->sk.sk_err = -sock_intr_errno(timeo);
 784                goto out;
 785        }
 786
 787        rc = __smc_connect(smc);
 788        if (rc < 0)
 789                smc->sk.sk_err = -rc;
 790
 791out:
 792        if (!sock_flag(&smc->sk, SOCK_DEAD)) {
 793                if (smc->sk.sk_err) {
 794                        smc->sk.sk_state_change(&smc->sk);
 795                } else { /* allow polling before and after fallback decision */
 796                        smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
 797                        smc->sk.sk_write_space(&smc->sk);
 798                }
 799        }
 800        release_sock(&smc->sk);
 801}
 802
 803static int smc_connect(struct socket *sock, struct sockaddr *addr,
 804                       int alen, int flags)
 805{
 806        struct sock *sk = sock->sk;
 807        struct smc_sock *smc;
 808        int rc = -EINVAL;
 809
 810        smc = smc_sk(sk);
 811
 812        /* separate smc parameter checking to be safe */
 813        if (alen < sizeof(addr->sa_family))
 814                goto out_err;
 815        if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 816                goto out_err;
 817
 818        lock_sock(sk);
 819        switch (sk->sk_state) {
 820        default:
 821                goto out;
 822        case SMC_ACTIVE:
 823                rc = -EISCONN;
 824                goto out;
 825        case SMC_INIT:
 826                rc = 0;
 827                break;
 828        }
 829
 830        smc_copy_sock_settings_to_clc(smc);
 831        tcp_sk(smc->clcsock->sk)->syn_smc = 1;
 832        if (smc->connect_nonblock) {
 833                rc = -EALREADY;
 834                goto out;
 835        }
 836        rc = kernel_connect(smc->clcsock, addr, alen, flags);
 837        if (rc && rc != -EINPROGRESS)
 838                goto out;
 839        if (flags & O_NONBLOCK) {
 840                if (schedule_work(&smc->connect_work))
 841                        smc->connect_nonblock = 1;
 842                rc = -EINPROGRESS;
 843        } else {
 844                rc = __smc_connect(smc);
 845                if (rc < 0)
 846                        goto out;
 847                else
 848                        rc = 0; /* success cases including fallback */
 849        }
 850
 851out:
 852        release_sock(sk);
 853out_err:
 854        return rc;
 855}
 856
 857static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
 858{
 859        struct socket *new_clcsock = NULL;
 860        struct sock *lsk = &lsmc->sk;
 861        struct sock *new_sk;
 862        int rc = -EINVAL;
 863
 864        release_sock(lsk);
 865        new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
 866        if (!new_sk) {
 867                rc = -ENOMEM;
 868                lsk->sk_err = ENOMEM;
 869                *new_smc = NULL;
 870                lock_sock(lsk);
 871                goto out;
 872        }
 873        *new_smc = smc_sk(new_sk);
 874
 875        mutex_lock(&lsmc->clcsock_release_lock);
 876        if (lsmc->clcsock)
 877                rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
 878        mutex_unlock(&lsmc->clcsock_release_lock);
 879        lock_sock(lsk);
 880        if  (rc < 0)
 881                lsk->sk_err = -rc;
 882        if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
 883                new_sk->sk_prot->unhash(new_sk);
 884                if (new_clcsock)
 885                        sock_release(new_clcsock);
 886                new_sk->sk_state = SMC_CLOSED;
 887                sock_set_flag(new_sk, SOCK_DEAD);
 888                sock_put(new_sk); /* final */
 889                *new_smc = NULL;
 890                goto out;
 891        }
 892
 893        (*new_smc)->clcsock = new_clcsock;
 894out:
 895        return rc;
 896}
 897
 898/* add a just created sock to the accept queue of the listen sock as
 899 * candidate for a following socket accept call from user space
 900 */
 901static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
 902{
 903        struct smc_sock *par = smc_sk(parent);
 904
 905        sock_hold(sk); /* sock_put in smc_accept_unlink () */
 906        spin_lock(&par->accept_q_lock);
 907        list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
 908        spin_unlock(&par->accept_q_lock);
 909        sk_acceptq_added(parent);
 910}
 911
 912/* remove a socket from the accept queue of its parental listening socket */
 913static void smc_accept_unlink(struct sock *sk)
 914{
 915        struct smc_sock *par = smc_sk(sk)->listen_smc;
 916
 917        spin_lock(&par->accept_q_lock);
 918        list_del_init(&smc_sk(sk)->accept_q);
 919        spin_unlock(&par->accept_q_lock);
 920        sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
 921        sock_put(sk); /* sock_hold in smc_accept_enqueue */
 922}
 923
 924/* remove a sock from the accept queue to bind it to a new socket created
 925 * for a socket accept call from user space
 926 */
 927struct sock *smc_accept_dequeue(struct sock *parent,
 928                                struct socket *new_sock)
 929{
 930        struct smc_sock *isk, *n;
 931        struct sock *new_sk;
 932
 933        list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
 934                new_sk = (struct sock *)isk;
 935
 936                smc_accept_unlink(new_sk);
 937                if (new_sk->sk_state == SMC_CLOSED) {
 938                        new_sk->sk_prot->unhash(new_sk);
 939                        if (isk->clcsock) {
 940                                sock_release(isk->clcsock);
 941                                isk->clcsock = NULL;
 942                        }
 943                        sock_put(new_sk); /* final */
 944                        continue;
 945                }
 946                if (new_sock) {
 947                        sock_graft(new_sk, new_sock);
 948                        if (isk->use_fallback) {
 949                                smc_sk(new_sk)->clcsock->file = new_sock->file;
 950                                isk->clcsock->file->private_data = isk->clcsock;
 951                        }
 952                }
 953                return new_sk;
 954        }
 955        return NULL;
 956}
 957
 958/* clean up for a created but never accepted sock */
 959void smc_close_non_accepted(struct sock *sk)
 960{
 961        struct smc_sock *smc = smc_sk(sk);
 962
 963        lock_sock(sk);
 964        if (!sk->sk_lingertime)
 965                /* wait for peer closing */
 966                sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
 967        if (!smc->use_fallback) {
 968                smc_close_active(smc);
 969                sock_set_flag(sk, SOCK_DEAD);
 970                sk->sk_shutdown |= SHUTDOWN_MASK;
 971        }
 972        sk->sk_prot->unhash(sk);
 973        if (smc->clcsock) {
 974                struct socket *tcp;
 975
 976                tcp = smc->clcsock;
 977                smc->clcsock = NULL;
 978                sock_release(tcp);
 979        }
 980        if (smc->use_fallback) {
 981                sock_put(sk); /* passive closing */
 982                sk->sk_state = SMC_CLOSED;
 983        } else {
 984                if (sk->sk_state == SMC_CLOSED)
 985                        smc_conn_free(&smc->conn);
 986        }
 987        release_sock(sk);
 988        sock_put(sk); /* final sock_put */
 989}
 990
 991static int smc_serv_conf_first_link(struct smc_sock *smc)
 992{
 993        struct net *net = sock_net(smc->clcsock->sk);
 994        struct smc_link_group *lgr = smc->conn.lgr;
 995        struct smc_link *link;
 996        int rest;
 997        int rc;
 998
 999        link = &lgr->lnk[SMC_SINGLE_LINK];
1000
1001        if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
1002                return SMC_CLC_DECL_ERR_REGRMB;
1003
1004        /* send CONFIRM LINK request to client over the RoCE fabric */
1005        rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
1006        if (rc < 0)
1007                return SMC_CLC_DECL_TIMEOUT_CL;
1008
1009        /* receive CONFIRM LINK response from client over the RoCE fabric */
1010        rest = wait_for_completion_interruptible_timeout(
1011                &link->llc_confirm_resp,
1012                SMC_LLC_WAIT_FIRST_TIME);
1013        if (rest <= 0) {
1014                struct smc_clc_msg_decline dclc;
1015
1016                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1017                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1018                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
1019        }
1020
1021        if (link->llc_confirm_resp_rc)
1022                return SMC_CLC_DECL_RMBE_EC;
1023
1024        /* send ADD LINK request to client over the RoCE fabric */
1025        rc = smc_llc_send_add_link(link,
1026                                   link->smcibdev->mac[link->ibport - 1],
1027                                   link->gid, SMC_LLC_REQ);
1028        if (rc < 0)
1029                return SMC_CLC_DECL_TIMEOUT_AL;
1030
1031        /* receive ADD LINK response from client over the RoCE fabric */
1032        rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
1033                                                         SMC_LLC_WAIT_TIME);
1034        if (rest <= 0) {
1035                struct smc_clc_msg_decline dclc;
1036
1037                rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
1038                                      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
1039                return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
1040        }
1041
1042        smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
1043
1044        return 0;
1045}
1046
1047/* listen worker: finish */
1048static void smc_listen_out(struct smc_sock *new_smc)
1049{
1050        struct smc_sock *lsmc = new_smc->listen_smc;
1051        struct sock *newsmcsk = &new_smc->sk;
1052
1053        if (lsmc->sk.sk_state == SMC_LISTEN) {
1054                lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1055                smc_accept_enqueue(&lsmc->sk, newsmcsk);
1056                release_sock(&lsmc->sk);
1057        } else { /* no longer listening */
1058                smc_close_non_accepted(newsmcsk);
1059        }
1060
1061        /* Wake up accept */
1062        lsmc->sk.sk_data_ready(&lsmc->sk);
1063        sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1064}
1065
1066/* listen worker: finish in state connected */
1067static void smc_listen_out_connected(struct smc_sock *new_smc)
1068{
1069        struct sock *newsmcsk = &new_smc->sk;
1070
1071        sk_refcnt_debug_inc(newsmcsk);
1072        if (newsmcsk->sk_state == SMC_INIT)
1073                newsmcsk->sk_state = SMC_ACTIVE;
1074
1075        smc_listen_out(new_smc);
1076}
1077
1078/* listen worker: finish in error state */
1079static void smc_listen_out_err(struct smc_sock *new_smc)
1080{
1081        struct sock *newsmcsk = &new_smc->sk;
1082
1083        if (newsmcsk->sk_state == SMC_INIT)
1084                sock_put(&new_smc->sk); /* passive closing */
1085        newsmcsk->sk_state = SMC_CLOSED;
1086        smc_conn_free(&new_smc->conn);
1087
1088        smc_listen_out(new_smc);
1089}
1090
1091/* listen worker: decline and fall back if possible */
1092static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1093                               int local_contact)
1094{
1095        /* RDMA setup failed, switch back to TCP */
1096        if (local_contact == SMC_FIRST_CONTACT)
1097                smc_lgr_forget(new_smc->conn.lgr);
1098        if (reason_code < 0) { /* error, no fallback possible */
1099                smc_listen_out_err(new_smc);
1100                return;
1101        }
1102        smc_conn_free(&new_smc->conn);
1103        smc_switch_to_fallback(new_smc);
1104        new_smc->fallback_rsn = reason_code;
1105        if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
1106                if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1107                        smc_listen_out_err(new_smc);
1108                        return;
1109                }
1110        }
1111        smc_listen_out_connected(new_smc);
1112}
1113
1114/* listen worker: check prefixes */
1115static int smc_listen_prfx_check(struct smc_sock *new_smc,
1116                                 struct smc_clc_msg_proposal *pclc)
1117{
1118        struct smc_clc_msg_proposal_prefix *pclc_prfx;
1119        struct socket *newclcsock = new_smc->clcsock;
1120
1121        pclc_prfx = smc_clc_proposal_get_prefix(pclc);
1122        if (smc_clc_prfx_match(newclcsock, pclc_prfx))
1123                return SMC_CLC_DECL_DIFFPREFIX;
1124
1125        return 0;
1126}
1127
1128/* listen worker: initialize connection and buffers */
1129static int smc_listen_rdma_init(struct smc_sock *new_smc,
1130                                struct smc_init_info *ini)
1131{
1132        int rc;
1133
1134        /* allocate connection / link group */
1135        rc = smc_conn_create(new_smc, ini);
1136        if (rc)
1137                return rc;
1138
1139        /* create send buffer and rmb */
1140        if (smc_buf_create(new_smc, false))
1141                return SMC_CLC_DECL_MEM;
1142
1143        return 0;
1144}
1145
1146/* listen worker: initialize connection and buffers for SMC-D */
1147static int smc_listen_ism_init(struct smc_sock *new_smc,
1148                               struct smc_clc_msg_proposal *pclc,
1149                               struct smc_init_info *ini)
1150{
1151        struct smc_clc_msg_smcd *pclc_smcd;
1152        int rc;
1153
1154        pclc_smcd = smc_get_clc_msg_smcd(pclc);
1155        ini->ism_gid = pclc_smcd->gid;
1156        rc = smc_conn_create(new_smc, ini);
1157        if (rc)
1158                return rc;
1159
1160        /* Check if peer can be reached via ISM device */
1161        if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1162                            new_smc->conn.lgr->vlan_id,
1163                            new_smc->conn.lgr->smcd)) {
1164                if (ini->cln_first_contact == SMC_FIRST_CONTACT)
1165                        smc_lgr_forget(new_smc->conn.lgr);
1166                smc_conn_free(&new_smc->conn);
1167                return SMC_CLC_DECL_SMCDNOTALK;
1168        }
1169
1170        /* Create send and receive buffers */
1171        if (smc_buf_create(new_smc, true)) {
1172                if (ini->cln_first_contact == SMC_FIRST_CONTACT)
1173                        smc_lgr_forget(new_smc->conn.lgr);
1174                smc_conn_free(&new_smc->conn);
1175                return SMC_CLC_DECL_MEM;
1176        }
1177
1178        return 0;
1179}
1180
1181/* listen worker: register buffers */
1182static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
1183{
1184        struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1185
1186        if (local_contact != SMC_FIRST_CONTACT) {
1187                if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
1188                        return SMC_CLC_DECL_ERR_REGRMB;
1189        }
1190        smc_rmb_sync_sg_for_device(&new_smc->conn);
1191
1192        return 0;
1193}
1194
1195/* listen worker: finish RDMA setup */
1196static int smc_listen_rdma_finish(struct smc_sock *new_smc,
1197                                  struct smc_clc_msg_accept_confirm *cclc,
1198                                  int local_contact)
1199{
1200        struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1201        int reason_code = 0;
1202
1203        if (local_contact == SMC_FIRST_CONTACT)
1204                smc_link_save_peer_info(link, cclc);
1205
1206        if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
1207                reason_code = SMC_CLC_DECL_ERR_RTOK;
1208                goto decline;
1209        }
1210
1211        if (local_contact == SMC_FIRST_CONTACT) {
1212                if (smc_ib_ready_link(link)) {
1213                        reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1214                        goto decline;
1215                }
1216                /* QP confirmation over RoCE fabric */
1217                reason_code = smc_serv_conf_first_link(new_smc);
1218                if (reason_code)
1219                        goto decline;
1220        }
1221        return 0;
1222
1223decline:
1224        smc_listen_decline(new_smc, reason_code, local_contact);
1225        return reason_code;
1226}
1227
1228/* setup for RDMA connection of server */
1229static void smc_listen_work(struct work_struct *work)
1230{
1231        struct smc_sock *new_smc = container_of(work, struct smc_sock,
1232                                                smc_listen_work);
1233        struct socket *newclcsock = new_smc->clcsock;
1234        struct smc_clc_msg_accept_confirm cclc;
1235        struct smc_clc_msg_proposal *pclc;
1236        struct smc_init_info ini = {0};
1237        bool ism_supported = false;
1238        u8 buf[SMC_CLC_MAX_LEN];
1239        int rc = 0;
1240
1241        if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
1242                return smc_listen_out_err(new_smc);
1243
1244        if (new_smc->use_fallback) {
1245                smc_listen_out_connected(new_smc);
1246                return;
1247        }
1248
1249        /* check if peer is smc capable */
1250        if (!tcp_sk(newclcsock->sk)->syn_smc) {
1251                smc_switch_to_fallback(new_smc);
1252                new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
1253                smc_listen_out_connected(new_smc);
1254                return;
1255        }
1256
1257        /* do inband token exchange -
1258         * wait for and receive SMC Proposal CLC message
1259         */
1260        pclc = (struct smc_clc_msg_proposal *)&buf;
1261        rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
1262                              SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
1263        if (rc)
1264                goto out_decl;
1265
1266        /* IPSec connections opt out of SMC-R optimizations */
1267        if (using_ipsec(new_smc)) {
1268                rc = SMC_CLC_DECL_IPSEC;
1269                goto out_decl;
1270        }
1271
1272        /* check for matching IP prefix and subnet length */
1273        rc = smc_listen_prfx_check(new_smc, pclc);
1274        if (rc)
1275                goto out_decl;
1276
1277        /* get vlan id from IP device */
1278        if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) {
1279                rc = SMC_CLC_DECL_GETVLANERR;
1280                goto out_decl;
1281        }
1282
1283        mutex_lock(&smc_server_lgr_pending);
1284        smc_close_init(new_smc);
1285        smc_rx_init(new_smc);
1286        smc_tx_init(new_smc);
1287
1288        /* check if ISM is available */
1289        if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) {
1290                ini.is_smcd = true; /* prepare ISM check */
1291                rc = smc_find_ism_device(new_smc, &ini);
1292                if (!rc)
1293                        rc = smc_listen_ism_init(new_smc, pclc, &ini);
1294                if (!rc)
1295                        ism_supported = true;
1296                else if (pclc->hdr.path == SMC_TYPE_D)
1297                        goto out_unlock; /* skip RDMA and decline */
1298        }
1299
1300        /* check if RDMA is available */
1301        if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
1302                /* prepare RDMA check */
1303                memset(&ini, 0, sizeof(ini));
1304                ini.is_smcd = false;
1305                ini.ib_lcl = &pclc->lcl;
1306                rc = smc_find_rdma_device(new_smc, &ini);
1307                if (rc) {
1308                        /* no RDMA device found */
1309                        if (pclc->hdr.path == SMC_TYPE_B)
1310                                /* neither ISM nor RDMA device found */
1311                                rc = SMC_CLC_DECL_NOSMCDEV;
1312                        goto out_unlock;
1313                }
1314                rc = smc_listen_rdma_init(new_smc, &ini);
1315                if (rc)
1316                        goto out_unlock;
1317                rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact);
1318                if (rc)
1319                        goto out_unlock;
1320        }
1321
1322        /* send SMC Accept CLC message */
1323        rc = smc_clc_send_accept(new_smc, ini.cln_first_contact);
1324        if (rc)
1325                goto out_unlock;
1326
1327        /* SMC-D does not need this lock any more */
1328        if (ism_supported)
1329                mutex_unlock(&smc_server_lgr_pending);
1330
1331        /* receive SMC Confirm CLC message */
1332        rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
1333                              SMC_CLC_CONFIRM, CLC_WAIT_TIME);
1334        if (rc) {
1335                if (!ism_supported)
1336                        goto out_unlock;
1337                goto out_decl;
1338        }
1339
1340        /* finish worker */
1341        if (!ism_supported) {
1342                rc = smc_listen_rdma_finish(new_smc, &cclc,
1343                                            ini.cln_first_contact);
1344                mutex_unlock(&smc_server_lgr_pending);
1345                if (rc)
1346                        return;
1347        }
1348        smc_conn_save_peer_info(new_smc, &cclc);
1349        smc_listen_out_connected(new_smc);
1350        return;
1351
1352out_unlock:
1353        mutex_unlock(&smc_server_lgr_pending);
1354out_decl:
1355        smc_listen_decline(new_smc, rc, ini.cln_first_contact);
1356}
1357
1358static void smc_tcp_listen_work(struct work_struct *work)
1359{
1360        struct smc_sock *lsmc = container_of(work, struct smc_sock,
1361                                             tcp_listen_work);
1362        struct sock *lsk = &lsmc->sk;
1363        struct smc_sock *new_smc;
1364        int rc = 0;
1365
1366        lock_sock(lsk);
1367        while (lsk->sk_state == SMC_LISTEN) {
1368                rc = smc_clcsock_accept(lsmc, &new_smc);
1369                if (rc)
1370                        goto out;
1371                if (!new_smc)
1372                        continue;
1373
1374                new_smc->listen_smc = lsmc;
1375                new_smc->use_fallback = lsmc->use_fallback;
1376                new_smc->fallback_rsn = lsmc->fallback_rsn;
1377                sock_hold(lsk); /* sock_put in smc_listen_work */
1378                INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1379                smc_copy_sock_settings_to_smc(new_smc);
1380                new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1381                new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
1382                sock_hold(&new_smc->sk); /* sock_put in passive closing */
1383                if (!schedule_work(&new_smc->smc_listen_work))
1384                        sock_put(&new_smc->sk);
1385        }
1386
1387out:
1388        release_sock(lsk);
1389        sock_put(&lsmc->sk); /* sock_hold in smc_listen */
1390}
1391
1392static int smc_listen(struct socket *sock, int backlog)
1393{
1394        struct sock *sk = sock->sk;
1395        struct smc_sock *smc;
1396        int rc;
1397
1398        smc = smc_sk(sk);
1399        lock_sock(sk);
1400
1401        rc = -EINVAL;
1402        if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
1403                goto out;
1404
1405        rc = 0;
1406        if (sk->sk_state == SMC_LISTEN) {
1407                sk->sk_max_ack_backlog = backlog;
1408                goto out;
1409        }
1410        /* some socket options are handled in core, so we could not apply
1411         * them to the clc socket -- copy smc socket options to clc socket
1412         */
1413        smc_copy_sock_settings_to_clc(smc);
1414        if (!smc->use_fallback)
1415                tcp_sk(smc->clcsock->sk)->syn_smc = 1;
1416
1417        rc = kernel_listen(smc->clcsock, backlog);
1418        if (rc)
1419                goto out;
1420        sk->sk_max_ack_backlog = backlog;
1421        sk->sk_ack_backlog = 0;
1422        sk->sk_state = SMC_LISTEN;
1423        sock_hold(sk); /* sock_hold in tcp_listen_worker */
1424        if (!schedule_work(&smc->tcp_listen_work))
1425                sock_put(sk);
1426
1427out:
1428        release_sock(sk);
1429        return rc;
1430}
1431
1432static int smc_accept(struct socket *sock, struct socket *new_sock,
1433                      int flags, bool kern)
1434{
1435        struct sock *sk = sock->sk, *nsk;
1436        DECLARE_WAITQUEUE(wait, current);
1437        struct smc_sock *lsmc;
1438        long timeo;
1439        int rc = 0;
1440
1441        lsmc = smc_sk(sk);
1442        sock_hold(sk); /* sock_put below */
1443        lock_sock(sk);
1444
1445        if (lsmc->sk.sk_state != SMC_LISTEN) {
1446                rc = -EINVAL;
1447                release_sock(sk);
1448                goto out;
1449        }
1450
1451        /* Wait for an incoming connection */
1452        timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1453        add_wait_queue_exclusive(sk_sleep(sk), &wait);
1454        while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1455                set_current_state(TASK_INTERRUPTIBLE);
1456                if (!timeo) {
1457                        rc = -EAGAIN;
1458                        break;
1459                }
1460                release_sock(sk);
1461                timeo = schedule_timeout(timeo);
1462                /* wakeup by sk_data_ready in smc_listen_work() */
1463                sched_annotate_sleep();
1464                lock_sock(sk);
1465                if (signal_pending(current)) {
1466                        rc = sock_intr_errno(timeo);
1467                        break;
1468                }
1469        }
1470        set_current_state(TASK_RUNNING);
1471        remove_wait_queue(sk_sleep(sk), &wait);
1472
1473        if (!rc)
1474                rc = sock_error(nsk);
1475        release_sock(sk);
1476        if (rc)
1477                goto out;
1478
1479        if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1480                /* wait till data arrives on the socket */
1481                timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1482                                                                MSEC_PER_SEC);
1483                if (smc_sk(nsk)->use_fallback) {
1484                        struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1485
1486                        lock_sock(clcsk);
1487                        if (skb_queue_empty(&clcsk->sk_receive_queue))
1488                                sk_wait_data(clcsk, &timeo, NULL);
1489                        release_sock(clcsk);
1490                } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1491                        lock_sock(nsk);
1492                        smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
1493                        release_sock(nsk);
1494                }
1495        }
1496
1497out:
1498        sock_put(sk); /* sock_hold above */
1499        return rc;
1500}
1501
1502static int smc_getname(struct socket *sock, struct sockaddr *addr,
1503                       int peer)
1504{
1505        struct smc_sock *smc;
1506
1507        if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1508            (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
1509                return -ENOTCONN;
1510
1511        smc = smc_sk(sock->sk);
1512
1513        return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
1514}
1515
1516static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1517{
1518        struct sock *sk = sock->sk;
1519        struct smc_sock *smc;
1520        int rc = -EPIPE;
1521
1522        smc = smc_sk(sk);
1523        lock_sock(sk);
1524        if ((sk->sk_state != SMC_ACTIVE) &&
1525            (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1526            (sk->sk_state != SMC_INIT))
1527                goto out;
1528
1529        if (msg->msg_flags & MSG_FASTOPEN) {
1530                if (sk->sk_state == SMC_INIT) {
1531                        smc_switch_to_fallback(smc);
1532                        smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1533                } else {
1534                        rc = -EINVAL;
1535                        goto out;
1536                }
1537        }
1538
1539        if (smc->use_fallback)
1540                rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1541        else
1542                rc = smc_tx_sendmsg(smc, msg, len);
1543out:
1544        release_sock(sk);
1545        return rc;
1546}
1547
1548static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1549                       int flags)
1550{
1551        struct sock *sk = sock->sk;
1552        struct smc_sock *smc;
1553        int rc = -ENOTCONN;
1554
1555        smc = smc_sk(sk);
1556        lock_sock(sk);
1557        if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1558                /* socket was connected before, no more data to read */
1559                rc = 0;
1560                goto out;
1561        }
1562        if ((sk->sk_state == SMC_INIT) ||
1563            (sk->sk_state == SMC_LISTEN) ||
1564            (sk->sk_state == SMC_CLOSED))
1565                goto out;
1566
1567        if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1568                rc = 0;
1569                goto out;
1570        }
1571
1572        if (smc->use_fallback) {
1573                rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
1574        } else {
1575                msg->msg_namelen = 0;
1576                rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1577        }
1578
1579out:
1580        release_sock(sk);
1581        return rc;
1582}
1583
1584static __poll_t smc_accept_poll(struct sock *parent)
1585{
1586        struct smc_sock *isk = smc_sk(parent);
1587        __poll_t mask = 0;
1588
1589        spin_lock(&isk->accept_q_lock);
1590        if (!list_empty(&isk->accept_q))
1591                mask = EPOLLIN | EPOLLRDNORM;
1592        spin_unlock(&isk->accept_q_lock);
1593
1594        return mask;
1595}
1596
1597static __poll_t smc_poll(struct file *file, struct socket *sock,
1598                             poll_table *wait)
1599{
1600        struct sock *sk = sock->sk;
1601        struct smc_sock *smc;
1602        __poll_t mask = 0;
1603
1604        if (!sk)
1605                return EPOLLNVAL;
1606
1607        smc = smc_sk(sock->sk);
1608        if (smc->use_fallback) {
1609                /* delegate to CLC child sock */
1610                mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
1611                sk->sk_err = smc->clcsock->sk->sk_err;
1612        } else {
1613                if (sk->sk_state != SMC_CLOSED)
1614                        sock_poll_wait(file, sock, wait);
1615                if (sk->sk_err)
1616                        mask |= EPOLLERR;
1617                if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1618                    (sk->sk_state == SMC_CLOSED))
1619                        mask |= EPOLLHUP;
1620                if (sk->sk_state == SMC_LISTEN) {
1621                        /* woken up by sk_data_ready in smc_listen_work() */
1622                        mask |= smc_accept_poll(sk);
1623                } else if (smc->use_fallback) { /* as result of connect_work()*/
1624                        mask |= smc->clcsock->ops->poll(file, smc->clcsock,
1625                                                           wait);
1626                        sk->sk_err = smc->clcsock->sk->sk_err;
1627                } else {
1628                        if ((sk->sk_state != SMC_INIT &&
1629                             atomic_read(&smc->conn.sndbuf_space)) ||
1630                            sk->sk_shutdown & SEND_SHUTDOWN) {
1631                                mask |= EPOLLOUT | EPOLLWRNORM;
1632                        } else {
1633                                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1634                                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1635                        }
1636                        if (atomic_read(&smc->conn.bytes_to_rcv))
1637                                mask |= EPOLLIN | EPOLLRDNORM;
1638                        if (sk->sk_shutdown & RCV_SHUTDOWN)
1639                                mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
1640                        if (sk->sk_state == SMC_APPCLOSEWAIT1)
1641                                mask |= EPOLLIN;
1642                        if (smc->conn.urg_state == SMC_URG_VALID)
1643                                mask |= EPOLLPRI;
1644                }
1645        }
1646
1647        return mask;
1648}
1649
1650static int smc_shutdown(struct socket *sock, int how)
1651{
1652        struct sock *sk = sock->sk;
1653        struct smc_sock *smc;
1654        int rc = -EINVAL;
1655        int rc1 = 0;
1656
1657        smc = smc_sk(sk);
1658
1659        if ((how < SHUT_RD) || (how > SHUT_RDWR))
1660                return rc;
1661
1662        lock_sock(sk);
1663
1664        rc = -ENOTCONN;
1665        if ((sk->sk_state != SMC_ACTIVE) &&
1666            (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1667            (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1668            (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1669            (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1670            (sk->sk_state != SMC_APPFINCLOSEWAIT))
1671                goto out;
1672        if (smc->use_fallback) {
1673                rc = kernel_sock_shutdown(smc->clcsock, how);
1674                sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1675                if (sk->sk_shutdown == SHUTDOWN_MASK)
1676                        sk->sk_state = SMC_CLOSED;
1677                goto out;
1678        }
1679        switch (how) {
1680        case SHUT_RDWR:         /* shutdown in both directions */
1681                rc = smc_close_active(smc);
1682                break;
1683        case SHUT_WR:
1684                rc = smc_close_shutdown_write(smc);
1685                break;
1686        case SHUT_RD:
1687                rc = 0;
1688                /* nothing more to do because peer is not involved */
1689                break;
1690        }
1691        if (smc->clcsock)
1692                rc1 = kernel_sock_shutdown(smc->clcsock, how);
1693        /* map sock_shutdown_cmd constants to sk_shutdown value range */
1694        sk->sk_shutdown |= how + 1;
1695
1696out:
1697        release_sock(sk);
1698        return rc ? rc : rc1;
1699}
1700
1701static int smc_setsockopt(struct socket *sock, int level, int optname,
1702                          char __user *optval, unsigned int optlen)
1703{
1704        struct sock *sk = sock->sk;
1705        struct smc_sock *smc;
1706        int val, rc;
1707
1708        smc = smc_sk(sk);
1709
1710        /* generic setsockopts reaching us here always apply to the
1711         * CLC socket
1712         */
1713        rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1714                                           optval, optlen);
1715        if (smc->clcsock->sk->sk_err) {
1716                sk->sk_err = smc->clcsock->sk->sk_err;
1717                sk->sk_error_report(sk);
1718        }
1719        if (rc)
1720                return rc;
1721
1722        if (optlen < sizeof(int))
1723                return -EINVAL;
1724        if (get_user(val, (int __user *)optval))
1725                return -EFAULT;
1726
1727        lock_sock(sk);
1728        switch (optname) {
1729        case TCP_ULP:
1730        case TCP_FASTOPEN:
1731        case TCP_FASTOPEN_CONNECT:
1732        case TCP_FASTOPEN_KEY:
1733        case TCP_FASTOPEN_NO_COOKIE:
1734                /* option not supported by SMC */
1735                if (sk->sk_state == SMC_INIT) {
1736                        smc_switch_to_fallback(smc);
1737                        smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1738                } else {
1739                        if (!smc->use_fallback)
1740                                rc = -EINVAL;
1741                }
1742                break;
1743        case TCP_NODELAY:
1744                if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
1745                        if (val && !smc->use_fallback)
1746                                mod_delayed_work(system_wq, &smc->conn.tx_work,
1747                                                 0);
1748                }
1749                break;
1750        case TCP_CORK:
1751                if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
1752                        if (!val && !smc->use_fallback)
1753                                mod_delayed_work(system_wq, &smc->conn.tx_work,
1754                                                 0);
1755                }
1756                break;
1757        case TCP_DEFER_ACCEPT:
1758                smc->sockopt_defer_accept = val;
1759                break;
1760        default:
1761                break;
1762        }
1763        release_sock(sk);
1764
1765        return rc;
1766}
1767
1768static int smc_getsockopt(struct socket *sock, int level, int optname,
1769                          char __user *optval, int __user *optlen)
1770{
1771        struct smc_sock *smc;
1772
1773        smc = smc_sk(sock->sk);
1774        /* socket options apply to the CLC socket */
1775        return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1776                                             optval, optlen);
1777}
1778
1779static int smc_ioctl(struct socket *sock, unsigned int cmd,
1780                     unsigned long arg)
1781{
1782        union smc_host_cursor cons, urg;
1783        struct smc_connection *conn;
1784        struct smc_sock *smc;
1785        int answ;
1786
1787        smc = smc_sk(sock->sk);
1788        conn = &smc->conn;
1789        lock_sock(&smc->sk);
1790        if (smc->use_fallback) {
1791                if (!smc->clcsock) {
1792                        release_sock(&smc->sk);
1793                        return -EBADF;
1794                }
1795                answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1796                release_sock(&smc->sk);
1797                return answ;
1798        }
1799        switch (cmd) {
1800        case SIOCINQ: /* same as FIONREAD */
1801                if (smc->sk.sk_state == SMC_LISTEN) {
1802                        release_sock(&smc->sk);
1803                        return -EINVAL;
1804                }
1805                if (smc->sk.sk_state == SMC_INIT ||
1806                    smc->sk.sk_state == SMC_CLOSED)
1807                        answ = 0;
1808                else
1809                        answ = atomic_read(&smc->conn.bytes_to_rcv);
1810                break;
1811        case SIOCOUTQ:
1812                /* output queue size (not send + not acked) */
1813                if (smc->sk.sk_state == SMC_LISTEN) {
1814                        release_sock(&smc->sk);
1815                        return -EINVAL;
1816                }
1817                if (smc->sk.sk_state == SMC_INIT ||
1818                    smc->sk.sk_state == SMC_CLOSED)
1819                        answ = 0;
1820                else
1821                        answ = smc->conn.sndbuf_desc->len -
1822                                        atomic_read(&smc->conn.sndbuf_space);
1823                break;
1824        case SIOCOUTQNSD:
1825                /* output queue size (not send only) */
1826                if (smc->sk.sk_state == SMC_LISTEN) {
1827                        release_sock(&smc->sk);
1828                        return -EINVAL;
1829                }
1830                if (smc->sk.sk_state == SMC_INIT ||
1831                    smc->sk.sk_state == SMC_CLOSED)
1832                        answ = 0;
1833                else
1834                        answ = smc_tx_prepared_sends(&smc->conn);
1835                break;
1836        case SIOCATMARK:
1837                if (smc->sk.sk_state == SMC_LISTEN) {
1838                        release_sock(&smc->sk);
1839                        return -EINVAL;
1840                }
1841                if (smc->sk.sk_state == SMC_INIT ||
1842                    smc->sk.sk_state == SMC_CLOSED) {
1843                        answ = 0;
1844                } else {
1845                        smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1846                        smc_curs_copy(&urg, &conn->urg_curs, conn);
1847                        answ = smc_curs_diff(conn->rmb_desc->len,
1848                                             &cons, &urg) == 1;
1849                }
1850                break;
1851        default:
1852                release_sock(&smc->sk);
1853                return -ENOIOCTLCMD;
1854        }
1855        release_sock(&smc->sk);
1856
1857        return put_user(answ, (int __user *)arg);
1858}
1859
1860static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1861                            int offset, size_t size, int flags)
1862{
1863        struct sock *sk = sock->sk;
1864        struct smc_sock *smc;
1865        int rc = -EPIPE;
1866
1867        smc = smc_sk(sk);
1868        lock_sock(sk);
1869        if (sk->sk_state != SMC_ACTIVE) {
1870                release_sock(sk);
1871                goto out;
1872        }
1873        release_sock(sk);
1874        if (smc->use_fallback)
1875                rc = kernel_sendpage(smc->clcsock, page, offset,
1876                                     size, flags);
1877        else
1878                rc = sock_no_sendpage(sock, page, offset, size, flags);
1879
1880out:
1881        return rc;
1882}
1883
1884/* Map the affected portions of the rmbe into an spd, note the number of bytes
1885 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1886 * updates till whenever a respective page has been fully processed.
1887 * Note that subsequent recv() calls have to wait till all splice() processing
1888 * completed.
1889 */
1890static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1891                               struct pipe_inode_info *pipe, size_t len,
1892                               unsigned int flags)
1893{
1894        struct sock *sk = sock->sk;
1895        struct smc_sock *smc;
1896        int rc = -ENOTCONN;
1897
1898        smc = smc_sk(sk);
1899        lock_sock(sk);
1900        if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1901                /* socket was connected before, no more data to read */
1902                rc = 0;
1903                goto out;
1904        }
1905        if (sk->sk_state == SMC_INIT ||
1906            sk->sk_state == SMC_LISTEN ||
1907            sk->sk_state == SMC_CLOSED)
1908                goto out;
1909
1910        if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1911                rc = 0;
1912                goto out;
1913        }
1914
1915        if (smc->use_fallback) {
1916                rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1917                                                    pipe, len, flags);
1918        } else {
1919                if (*ppos) {
1920                        rc = -ESPIPE;
1921                        goto out;
1922                }
1923                if (flags & SPLICE_F_NONBLOCK)
1924                        flags = MSG_DONTWAIT;
1925                else
1926                        flags = 0;
1927                rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
1928        }
1929out:
1930        release_sock(sk);
1931
1932        return rc;
1933}
1934
1935/* must look like tcp */
1936static const struct proto_ops smc_sock_ops = {
1937        .family         = PF_SMC,
1938        .owner          = THIS_MODULE,
1939        .release        = smc_release,
1940        .bind           = smc_bind,
1941        .connect        = smc_connect,
1942        .socketpair     = sock_no_socketpair,
1943        .accept         = smc_accept,
1944        .getname        = smc_getname,
1945        .poll           = smc_poll,
1946        .ioctl          = smc_ioctl,
1947        .listen         = smc_listen,
1948        .shutdown       = smc_shutdown,
1949        .setsockopt     = smc_setsockopt,
1950        .getsockopt     = smc_getsockopt,
1951        .sendmsg        = smc_sendmsg,
1952        .recvmsg        = smc_recvmsg,
1953        .mmap           = sock_no_mmap,
1954        .sendpage       = smc_sendpage,
1955        .splice_read    = smc_splice_read,
1956};
1957
1958static int smc_create(struct net *net, struct socket *sock, int protocol,
1959                      int kern)
1960{
1961        int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
1962        struct smc_sock *smc;
1963        struct sock *sk;
1964        int rc;
1965
1966        rc = -ESOCKTNOSUPPORT;
1967        if (sock->type != SOCK_STREAM)
1968                goto out;
1969
1970        rc = -EPROTONOSUPPORT;
1971        if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
1972                goto out;
1973
1974        rc = -ENOBUFS;
1975        sock->ops = &smc_sock_ops;
1976        sk = smc_sock_alloc(net, sock, protocol);
1977        if (!sk)
1978                goto out;
1979
1980        /* create internal TCP socket for CLC handshake and fallback */
1981        smc = smc_sk(sk);
1982        smc->use_fallback = false; /* assume rdma capability first */
1983        smc->fallback_rsn = 0;
1984        rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1985                              &smc->clcsock);
1986        if (rc) {
1987                sk_common_release(sk);
1988                goto out;
1989        }
1990        smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1991        smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
1992
1993out:
1994        return rc;
1995}
1996
1997static const struct net_proto_family smc_sock_family_ops = {
1998        .family = PF_SMC,
1999        .owner  = THIS_MODULE,
2000        .create = smc_create,
2001};
2002
2003unsigned int smc_net_id;
2004
2005static __net_init int smc_net_init(struct net *net)
2006{
2007        return smc_pnet_net_init(net);
2008}
2009
2010static void __net_exit smc_net_exit(struct net *net)
2011{
2012        smc_pnet_net_exit(net);
2013}
2014
2015static struct pernet_operations smc_net_ops = {
2016        .init = smc_net_init,
2017        .exit = smc_net_exit,
2018        .id   = &smc_net_id,
2019        .size = sizeof(struct smc_net),
2020};
2021
2022static int __init smc_init(void)
2023{
2024        int rc;
2025
2026        rc = register_pernet_subsys(&smc_net_ops);
2027        if (rc)
2028                return rc;
2029
2030        rc = smc_pnet_init();
2031        if (rc)
2032                goto out_pernet_subsys;
2033
2034        rc = smc_llc_init();
2035        if (rc) {
2036                pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
2037                goto out_pnet;
2038        }
2039
2040        rc = smc_cdc_init();
2041        if (rc) {
2042                pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
2043                goto out_pnet;
2044        }
2045
2046        rc = proto_register(&smc_proto, 1);
2047        if (rc) {
2048                pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
2049                goto out_pnet;
2050        }
2051
2052        rc = proto_register(&smc_proto6, 1);
2053        if (rc) {
2054                pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
2055                goto out_proto;
2056        }
2057
2058        rc = sock_register(&smc_sock_family_ops);
2059        if (rc) {
2060                pr_err("%s: sock_register fails with %d\n", __func__, rc);
2061                goto out_proto6;
2062        }
2063        INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
2064        INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
2065
2066        rc = smc_ib_register_client();
2067        if (rc) {
2068                pr_err("%s: ib_register fails with %d\n", __func__, rc);
2069                goto out_sock;
2070        }
2071
2072        static_branch_enable(&tcp_have_smc);
2073        return 0;
2074
2075out_sock:
2076        sock_unregister(PF_SMC);
2077out_proto6:
2078        proto_unregister(&smc_proto6);
2079out_proto:
2080        proto_unregister(&smc_proto);
2081out_pnet:
2082        smc_pnet_exit();
2083out_pernet_subsys:
2084        unregister_pernet_subsys(&smc_net_ops);
2085
2086        return rc;
2087}
2088
2089static void __exit smc_exit(void)
2090{
2091        smc_core_exit();
2092        static_branch_disable(&tcp_have_smc);
2093        smc_ib_unregister_client();
2094        sock_unregister(PF_SMC);
2095        proto_unregister(&smc_proto6);
2096        proto_unregister(&smc_proto);
2097        smc_pnet_exit();
2098        unregister_pernet_subsys(&smc_net_ops);
2099}
2100
2101module_init(smc_init);
2102module_exit(smc_exit);
2103
2104MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
2105MODULE_DESCRIPTION("smc socket address family");
2106MODULE_LICENSE("GPL");
2107MODULE_ALIAS_NETPROTO(PF_SMC);
2108