linux/net/smc/smc_clc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Shared Memory Communications over RDMA (SMC-R) and RoCE
   4 *
   5 *  CLC (connection layer control) handshake over initial TCP socket to
   6 *  prepare for RDMA traffic
   7 *
   8 *  Copyright IBM Corp. 2016
   9 *
  10 *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
  11 */
  12
  13#include <linux/in.h>
  14#include <linux/if_ether.h>
  15#include <linux/sched/signal.h>
  16
  17#include <net/sock.h>
  18#include <net/tcp.h>
  19
  20#include "smc.h"
  21#include "smc_core.h"
  22#include "smc_clc.h"
  23#include "smc_ib.h"
  24
  25/* check if received message has a correct header length and contains valid
  26 * heading and trailing eyecatchers
  27 */
  28static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
  29{
  30        struct smc_clc_msg_proposal_prefix *pclc_prfx;
  31        struct smc_clc_msg_accept_confirm *clc;
  32        struct smc_clc_msg_proposal *pclc;
  33        struct smc_clc_msg_decline *dclc;
  34        struct smc_clc_msg_trail *trl;
  35
  36        if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
  37                return false;
  38        switch (clcm->type) {
  39        case SMC_CLC_PROPOSAL:
  40                pclc = (struct smc_clc_msg_proposal *)clcm;
  41                pclc_prfx = smc_clc_proposal_get_prefix(pclc);
  42                if (ntohs(pclc->hdr.length) !=
  43                        sizeof(*pclc) + ntohs(pclc->iparea_offset) +
  44                        sizeof(*pclc_prfx) +
  45                        pclc_prfx->ipv6_prefixes_cnt *
  46                                sizeof(struct smc_clc_ipv6_prefix) +
  47                        sizeof(*trl))
  48                        return false;
  49                trl = (struct smc_clc_msg_trail *)
  50                        ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
  51                break;
  52        case SMC_CLC_ACCEPT:
  53        case SMC_CLC_CONFIRM:
  54                clc = (struct smc_clc_msg_accept_confirm *)clcm;
  55                if (ntohs(clc->hdr.length) != sizeof(*clc))
  56                        return false;
  57                trl = &clc->trl;
  58                break;
  59        case SMC_CLC_DECLINE:
  60                dclc = (struct smc_clc_msg_decline *)clcm;
  61                if (ntohs(dclc->hdr.length) != sizeof(*dclc))
  62                        return false;
  63                trl = &dclc->trl;
  64                break;
  65        default:
  66                return false;
  67        }
  68        if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
  69                return false;
  70        return true;
  71}
  72
  73/* Wait for data on the tcp-socket, analyze received data
  74 * Returns:
  75 * 0 if success and it was not a decline that we received.
  76 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
  77 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
  78 */
  79int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
  80                     u8 expected_type)
  81{
  82        struct sock *clc_sk = smc->clcsock->sk;
  83        struct smc_clc_msg_hdr *clcm = buf;
  84        struct msghdr msg = {NULL, 0};
  85        int reason_code = 0;
  86        struct kvec vec = {buf, buflen};
  87        int len, datlen;
  88        int krflags;
  89
  90        /* peek the first few bytes to determine length of data to receive
  91         * so we don't consume any subsequent CLC message or payload data
  92         * in the TCP byte stream
  93         */
  94        /*
  95         * Caller must make sure that buflen is no less than
  96         * sizeof(struct smc_clc_msg_hdr)
  97         */
  98        krflags = MSG_PEEK | MSG_WAITALL;
  99        smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
 100        iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
 101                        sizeof(struct smc_clc_msg_hdr));
 102        len = sock_recvmsg(smc->clcsock, &msg, krflags);
 103        if (signal_pending(current)) {
 104                reason_code = -EINTR;
 105                clc_sk->sk_err = EINTR;
 106                smc->sk.sk_err = EINTR;
 107                goto out;
 108        }
 109        if (clc_sk->sk_err) {
 110                reason_code = -clc_sk->sk_err;
 111                smc->sk.sk_err = clc_sk->sk_err;
 112                goto out;
 113        }
 114        if (!len) { /* peer has performed orderly shutdown */
 115                smc->sk.sk_err = ECONNRESET;
 116                reason_code = -ECONNRESET;
 117                goto out;
 118        }
 119        if (len < 0) {
 120                smc->sk.sk_err = -len;
 121                reason_code = len;
 122                goto out;
 123        }
 124        datlen = ntohs(clcm->length);
 125        if ((len < sizeof(struct smc_clc_msg_hdr)) ||
 126            (datlen > buflen) ||
 127            ((clcm->type != SMC_CLC_DECLINE) &&
 128             (clcm->type != expected_type))) {
 129                smc->sk.sk_err = EPROTO;
 130                reason_code = -EPROTO;
 131                goto out;
 132        }
 133
 134        /* receive the complete CLC message */
 135        memset(&msg, 0, sizeof(struct msghdr));
 136        iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
 137        krflags = MSG_WAITALL;
 138        smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
 139        len = sock_recvmsg(smc->clcsock, &msg, krflags);
 140        if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
 141                smc->sk.sk_err = EPROTO;
 142                reason_code = -EPROTO;
 143                goto out;
 144        }
 145        if (clcm->type == SMC_CLC_DECLINE) {
 146                reason_code = SMC_CLC_DECL_REPLY;
 147                if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
 148                        smc->conn.lgr->sync_err = true;
 149                        smc_lgr_terminate(smc->conn.lgr);
 150                }
 151        }
 152
 153out:
 154        return reason_code;
 155}
 156
 157/* send CLC DECLINE message across internal TCP socket */
 158int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
 159{
 160        struct smc_clc_msg_decline dclc;
 161        struct msghdr msg;
 162        struct kvec vec;
 163        int len;
 164
 165        memset(&dclc, 0, sizeof(dclc));
 166        memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 167        dclc.hdr.type = SMC_CLC_DECLINE;
 168        dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
 169        dclc.hdr.version = SMC_CLC_V1;
 170        dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0;
 171        memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
 172        dclc.peer_diagnosis = htonl(peer_diag_info);
 173        memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 174
 175        memset(&msg, 0, sizeof(msg));
 176        vec.iov_base = &dclc;
 177        vec.iov_len = sizeof(struct smc_clc_msg_decline);
 178        len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
 179                             sizeof(struct smc_clc_msg_decline));
 180        if (len < sizeof(struct smc_clc_msg_decline))
 181                smc->sk.sk_err = EPROTO;
 182        if (len < 0)
 183                smc->sk.sk_err = -len;
 184        return sock_error(&smc->sk);
 185}
 186
 187/* send CLC PROPOSAL message across internal TCP socket */
 188int smc_clc_send_proposal(struct smc_sock *smc,
 189                          struct smc_ib_device *smcibdev,
 190                          u8 ibport)
 191{
 192        struct smc_clc_msg_proposal_prefix pclc_prfx;
 193        struct smc_clc_msg_proposal pclc;
 194        struct smc_clc_msg_trail trl;
 195        int reason_code = 0;
 196        struct kvec vec[3];
 197        struct msghdr msg;
 198        int len, plen, rc;
 199
 200        /* send SMC Proposal CLC message */
 201        plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
 202        memset(&pclc, 0, sizeof(pclc));
 203        memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 204        pclc.hdr.type = SMC_CLC_PROPOSAL;
 205        pclc.hdr.length = htons(plen);
 206        pclc.hdr.version = SMC_CLC_V1;          /* SMC version */
 207        memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
 208        memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
 209        memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
 210        pclc.iparea_offset = htons(0);
 211
 212        memset(&pclc_prfx, 0, sizeof(pclc_prfx));
 213        /* determine subnet and mask from internal TCP socket */
 214        rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
 215                                  &pclc_prfx.prefix_len);
 216        if (rc)
 217                return SMC_CLC_DECL_CNFERR; /* configuration error */
 218        pclc_prfx.ipv6_prefixes_cnt = 0;
 219        memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 220        memset(&msg, 0, sizeof(msg));
 221        vec[0].iov_base = &pclc;
 222        vec[0].iov_len = sizeof(pclc);
 223        vec[1].iov_base = &pclc_prfx;
 224        vec[1].iov_len = sizeof(pclc_prfx);
 225        vec[2].iov_base = &trl;
 226        vec[2].iov_len = sizeof(trl);
 227        /* due to the few bytes needed for clc-handshake this cannot block */
 228        len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
 229        if (len < sizeof(pclc)) {
 230                if (len >= 0) {
 231                        reason_code = -ENETUNREACH;
 232                        smc->sk.sk_err = -reason_code;
 233                } else {
 234                        smc->sk.sk_err = smc->clcsock->sk->sk_err;
 235                        reason_code = -smc->sk.sk_err;
 236                }
 237        }
 238
 239        return reason_code;
 240}
 241
 242/* send CLC CONFIRM message across internal TCP socket */
 243int smc_clc_send_confirm(struct smc_sock *smc)
 244{
 245        struct smc_connection *conn = &smc->conn;
 246        struct smc_clc_msg_accept_confirm cclc;
 247        struct smc_link *link;
 248        int reason_code = 0;
 249        struct msghdr msg;
 250        struct kvec vec;
 251        int len;
 252
 253        link = &conn->lgr->lnk[SMC_SINGLE_LINK];
 254        /* send SMC Confirm CLC msg */
 255        memset(&cclc, 0, sizeof(cclc));
 256        memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 257        cclc.hdr.type = SMC_CLC_CONFIRM;
 258        cclc.hdr.length = htons(sizeof(cclc));
 259        cclc.hdr.version = SMC_CLC_V1;          /* SMC version */
 260        memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
 261        memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
 262               SMC_GID_SIZE);
 263        memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 264        hton24(cclc.qpn, link->roce_qp->qp_num);
 265        cclc.rmb_rkey =
 266                htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 267        cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
 268        cclc.rmbe_alert_token = htonl(conn->alert_token_local);
 269        cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
 270        cclc.rmbe_size = conn->rmbe_size_short;
 271        cclc.rmb_dma_addr = cpu_to_be64(
 272                (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
 273        hton24(cclc.psn, link->psn_initial);
 274
 275        memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 276
 277        memset(&msg, 0, sizeof(msg));
 278        vec.iov_base = &cclc;
 279        vec.iov_len = sizeof(cclc);
 280        len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
 281        if (len < sizeof(cclc)) {
 282                if (len >= 0) {
 283                        reason_code = -ENETUNREACH;
 284                        smc->sk.sk_err = -reason_code;
 285                } else {
 286                        smc->sk.sk_err = smc->clcsock->sk->sk_err;
 287                        reason_code = -smc->sk.sk_err;
 288                }
 289        }
 290        return reason_code;
 291}
 292
 293/* send CLC ACCEPT message across internal TCP socket */
 294int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 295{
 296        struct smc_connection *conn = &new_smc->conn;
 297        struct smc_clc_msg_accept_confirm aclc;
 298        struct smc_link *link;
 299        struct msghdr msg;
 300        struct kvec vec;
 301        int rc = 0;
 302        int len;
 303
 304        link = &conn->lgr->lnk[SMC_SINGLE_LINK];
 305        memset(&aclc, 0, sizeof(aclc));
 306        memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 307        aclc.hdr.type = SMC_CLC_ACCEPT;
 308        aclc.hdr.length = htons(sizeof(aclc));
 309        aclc.hdr.version = SMC_CLC_V1;          /* SMC version */
 310        if (srv_first_contact)
 311                aclc.hdr.flag = 1;
 312        memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
 313        memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
 314               SMC_GID_SIZE);
 315        memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
 316        hton24(aclc.qpn, link->roce_qp->qp_num);
 317        aclc.rmb_rkey =
 318                htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
 319        aclc.conn_idx = 1;                      /* as long as 1 RMB = 1 RMBE */
 320        aclc.rmbe_alert_token = htonl(conn->alert_token_local);
 321        aclc.qp_mtu = link->path_mtu;
 322        aclc.rmbe_size = conn->rmbe_size_short,
 323        aclc.rmb_dma_addr = cpu_to_be64(
 324                (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
 325        hton24(aclc.psn, link->psn_initial);
 326        memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
 327
 328        memset(&msg, 0, sizeof(msg));
 329        vec.iov_base = &aclc;
 330        vec.iov_len = sizeof(aclc);
 331        len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
 332        if (len < sizeof(aclc)) {
 333                if (len >= 0)
 334                        new_smc->sk.sk_err = EPROTO;
 335                else
 336                        new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
 337                rc = sock_error(&new_smc->sk);
 338        }
 339
 340        return rc;
 341}
 342