linux/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
   3 *
   4 * Copyright (c) 2012, Intel Corporation.
   5 *
   6 *   Author: Zach Brown <zab@zabbo.net>
   7 *   Author: Peter J. Braam <braam@clusterfs.com>
   8 *   Author: Phil Schwan <phil@clusterfs.com>
   9 *   Author: Eric Barton <eric@bartonsoftware.com>
  10 *
  11 *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
  12 *
  13 *   Portals is free software; you can redistribute it and/or
  14 *   modify it under the terms of version 2 of the GNU General Public
  15 *   License as published by the Free Software Foundation.
  16 *
  17 *   Portals is distributed in the hope that it will be useful,
  18 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 *   GNU General Public License for more details.
  21 *
  22 *   You should have received a copy of the GNU General Public License
  23 *   along with Portals; if not, write to the Free Software
  24 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  25 */
  26
  27#include "socklnd.h"
  28
  29/*
  30 * Protocol entries :
  31 *   pro_send_hello       : send hello message
  32 *   pro_recv_hello       : receive hello message
  33 *   pro_pack        : pack message header
  34 *   pro_unpack    : unpack message header
  35 *   pro_queue_tx_zcack() : Called holding BH lock: kss_lock
  36 *                        return 1 if ACK is piggybacked, otherwise return 0
  37 *   pro_queue_tx_msg()   : Called holding BH lock: kss_lock
  38 *                        return the ACK that piggybacked by my message, or NULL
  39 *   pro_handle_zcreq()   : handler of incoming ZC-REQ
  40 *   pro_handle_zcack()   : handler of incoming ZC-ACK
  41 *   pro_match_tx()       : Called holding glock
  42 */
  43
  44static ksock_tx_t *
  45ksocknal_queue_tx_msg_v1(ksock_conn_t *conn, ksock_tx_t *tx_msg)
  46{
  47        /* V1.x, just enqueue it */
  48        list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
  49        return NULL;
  50}
  51
  52void
  53ksocknal_next_tx_carrier(ksock_conn_t *conn)
  54{
  55        ksock_tx_t     *tx = conn->ksnc_tx_carrier;
  56
  57        /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
  58        LASSERT (!list_empty(&conn->ksnc_tx_queue));
  59        LASSERT (tx != NULL);
  60
  61        /* Next TX that can carry ZC-ACK or LNet message */
  62        if (tx->tx_list.next == &conn->ksnc_tx_queue) {
  63                /* no more packets queued */
  64                conn->ksnc_tx_carrier = NULL;
  65        } else {
  66                conn->ksnc_tx_carrier = list_entry(tx->tx_list.next,
  67                                                       ksock_tx_t, tx_list);
  68                LASSERT (conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
  69        }
  70}
  71
  72static int
  73ksocknal_queue_tx_zcack_v2(ksock_conn_t *conn,
  74                           ksock_tx_t *tx_ack, __u64 cookie)
  75{
  76        ksock_tx_t *tx = conn->ksnc_tx_carrier;
  77
  78        LASSERT (tx_ack == NULL ||
  79                 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
  80
  81        /*
  82         * Enqueue or piggyback tx_ack / cookie
  83         * . no tx can piggyback cookie of tx_ack (or cookie), just
  84         *   enqueue the tx_ack (if tx_ack != NUL) and return NULL.
  85         * . There is tx can piggyback cookie of tx_ack (or cookie),
  86         *   piggyback the cookie and return the tx.
  87         */
  88        if (tx == NULL) {
  89                if (tx_ack != NULL) {
  90                        list_add_tail(&tx_ack->tx_list,
  91                                          &conn->ksnc_tx_queue);
  92                        conn->ksnc_tx_carrier = tx_ack;
  93                }
  94                return 0;
  95        }
  96
  97        if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
  98                /* tx is noop zc-ack, can't piggyback zc-ack cookie */
  99                if (tx_ack != NULL)
 100                        list_add_tail(&tx_ack->tx_list,
 101                                          &conn->ksnc_tx_queue);
 102                return 0;
 103        }
 104
 105        LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
 106        LASSERT(tx->tx_msg.ksm_zc_cookies[1] == 0);
 107
 108        if (tx_ack != NULL)
 109                cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
 110
 111        /* piggyback the zc-ack cookie */
 112        tx->tx_msg.ksm_zc_cookies[1] = cookie;
 113        /* move on to the next TX which can carry cookie */
 114        ksocknal_next_tx_carrier(conn);
 115
 116        return 1;
 117}
 118
 119static ksock_tx_t *
 120ksocknal_queue_tx_msg_v2(ksock_conn_t *conn, ksock_tx_t *tx_msg)
 121{
 122        ksock_tx_t  *tx  = conn->ksnc_tx_carrier;
 123
 124        /*
 125         * Enqueue tx_msg:
 126         * . If there is no NOOP on the connection, just enqueue
 127         *   tx_msg and return NULL
 128         * . If there is NOOP on the connection, piggyback the cookie
 129         *   and replace the NOOP tx, and return the NOOP tx.
 130         */
 131        if (tx == NULL) { /* nothing on queue */
 132                list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
 133                conn->ksnc_tx_carrier = tx_msg;
 134                return NULL;
 135        }
 136
 137        if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
 138                list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
 139                return NULL;
 140        }
 141
 142        LASSERT (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
 143
 144        /* There is a noop zc-ack can be piggybacked */
 145        tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
 146        ksocknal_next_tx_carrier(conn);
 147
 148        /* use new_tx to replace the noop zc-ack packet */
 149        list_add(&tx_msg->tx_list, &tx->tx_list);
 150        list_del(&tx->tx_list);
 151
 152        return tx;
 153}
 154
 155static int
 156ksocknal_queue_tx_zcack_v3(ksock_conn_t *conn,
 157                           ksock_tx_t *tx_ack, __u64 cookie)
 158{
 159        ksock_tx_t *tx;
 160
 161        if (conn->ksnc_type != SOCKLND_CONN_ACK)
 162                return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
 163
 164        /* non-blocking ZC-ACK (to router) */
 165        LASSERT (tx_ack == NULL ||
 166                 tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
 167
 168        if ((tx = conn->ksnc_tx_carrier) == NULL) {
 169                if (tx_ack != NULL) {
 170                        list_add_tail(&tx_ack->tx_list,
 171                                          &conn->ksnc_tx_queue);
 172                        conn->ksnc_tx_carrier = tx_ack;
 173                }
 174                return 0;
 175        }
 176
 177        /* conn->ksnc_tx_carrier != NULL */
 178
 179        if (tx_ack != NULL)
 180                cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
 181
 182        if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
 183                return 1;
 184
 185        if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
 186                /* replace the keepalive PING with a real ACK */
 187                LASSERT (tx->tx_msg.ksm_zc_cookies[0] == 0);
 188                tx->tx_msg.ksm_zc_cookies[1] = cookie;
 189                return 1;
 190        }
 191
 192        if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
 193            cookie == tx->tx_msg.ksm_zc_cookies[1]) {
 194                CWARN("%s: duplicated ZC cookie: "LPU64"\n",
 195                      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
 196                return 1; /* XXX return error in the future */
 197        }
 198
 199        if (tx->tx_msg.ksm_zc_cookies[0] == 0) {
 200                /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
 201                if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
 202                        tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
 203                        tx->tx_msg.ksm_zc_cookies[1] = cookie;
 204                } else {
 205                        tx->tx_msg.ksm_zc_cookies[0] = cookie;
 206                }
 207
 208                if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
 209                        /* not likely to carry more ACKs, skip it to simplify logic */
 210                        ksocknal_next_tx_carrier(conn);
 211                }
 212
 213                return 1;
 214        }
 215
 216        /* takes two or more cookies already */
 217
 218        if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
 219                __u64   tmp = 0;
 220
 221                /* two separated cookies: (a+2, a) or (a+1, a) */
 222                LASSERT (tx->tx_msg.ksm_zc_cookies[0] -
 223                         tx->tx_msg.ksm_zc_cookies[1] <= 2);
 224
 225                if (tx->tx_msg.ksm_zc_cookies[0] -
 226                    tx->tx_msg.ksm_zc_cookies[1] == 2) {
 227                        if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
 228                                tmp = cookie;
 229                } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
 230                        tmp = tx->tx_msg.ksm_zc_cookies[1];
 231                } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
 232                        tmp = tx->tx_msg.ksm_zc_cookies[0];
 233                }
 234
 235                if (tmp != 0) {
 236                        /* range of cookies */
 237                        tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
 238                        tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
 239                        return 1;
 240                }
 241
 242        } else {
 243                /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
 244                if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
 245                    cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
 246                        CWARN("%s: duplicated ZC cookie: "LPU64"\n",
 247                              libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
 248                        return 1; /* XXX: return error in the future */
 249                }
 250
 251                if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
 252                        tx->tx_msg.ksm_zc_cookies[1] = cookie;
 253                        return 1;
 254                }
 255
 256                if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
 257                        tx->tx_msg.ksm_zc_cookies[0] = cookie;
 258                        return 1;
 259                }
 260        }
 261
 262        /* failed to piggyback ZC-ACK */
 263        if (tx_ack != NULL) {
 264                list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
 265                /* the next tx can piggyback at least 1 ACK */
 266                ksocknal_next_tx_carrier(conn);
 267        }
 268
 269        return 0;
 270}
 271
 272static int
 273ksocknal_match_tx(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
 274{
 275        int nob;
 276
 277#if SOCKNAL_VERSION_DEBUG
 278        if (!*ksocknal_tunables.ksnd_typed_conns)
 279                return SOCKNAL_MATCH_YES;
 280#endif
 281
 282        if (tx == NULL || tx->tx_lnetmsg == NULL) {
 283                /* noop packet */
 284                nob = offsetof(ksock_msg_t, ksm_u);
 285        } else {
 286                nob = tx->tx_lnetmsg->msg_len +
 287                      ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
 288                       sizeof(lnet_hdr_t) : sizeof(ksock_msg_t));
 289        }
 290
 291        /* default checking for typed connection */
 292        switch (conn->ksnc_type) {
 293        default:
 294                CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
 295                LBUG();
 296        case SOCKLND_CONN_ANY:
 297                return SOCKNAL_MATCH_YES;
 298
 299        case SOCKLND_CONN_BULK_IN:
 300                return SOCKNAL_MATCH_MAY;
 301
 302        case SOCKLND_CONN_BULK_OUT:
 303                if (nob < *ksocknal_tunables.ksnd_min_bulk)
 304                        return SOCKNAL_MATCH_MAY;
 305                else
 306                        return SOCKNAL_MATCH_YES;
 307
 308        case SOCKLND_CONN_CONTROL:
 309                if (nob >= *ksocknal_tunables.ksnd_min_bulk)
 310                        return SOCKNAL_MATCH_MAY;
 311                else
 312                        return SOCKNAL_MATCH_YES;
 313        }
 314}
 315
 316static int
 317ksocknal_match_tx_v3(ksock_conn_t *conn, ksock_tx_t *tx, int nonblk)
 318{
 319        int nob;
 320
 321        if (tx == NULL || tx->tx_lnetmsg == NULL)
 322                nob = offsetof(ksock_msg_t, ksm_u);
 323        else
 324                nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t);
 325
 326        switch (conn->ksnc_type) {
 327        default:
 328                CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
 329                LBUG();
 330        case SOCKLND_CONN_ANY:
 331                return SOCKNAL_MATCH_NO;
 332
 333        case SOCKLND_CONN_ACK:
 334                if (nonblk)
 335                        return SOCKNAL_MATCH_YES;
 336                else if (tx == NULL || tx->tx_lnetmsg == NULL)
 337                        return SOCKNAL_MATCH_MAY;
 338                else
 339                        return SOCKNAL_MATCH_NO;
 340
 341        case SOCKLND_CONN_BULK_OUT:
 342                if (nonblk)
 343                        return SOCKNAL_MATCH_NO;
 344                else if (nob < *ksocknal_tunables.ksnd_min_bulk)
 345                        return SOCKNAL_MATCH_MAY;
 346                else
 347                        return SOCKNAL_MATCH_YES;
 348
 349        case SOCKLND_CONN_CONTROL:
 350                if (nonblk)
 351                        return SOCKNAL_MATCH_NO;
 352                else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
 353                        return SOCKNAL_MATCH_MAY;
 354                else
 355                        return SOCKNAL_MATCH_YES;
 356        }
 357}
 358
 359/* (Sink) handle incoming ZC request from sender */
 360static int
 361ksocknal_handle_zcreq(ksock_conn_t *c, __u64 cookie, int remote)
 362{
 363        ksock_peer_t   *peer = c->ksnc_peer;
 364        ksock_conn_t   *conn;
 365        ksock_tx_t     *tx;
 366        int          rc;
 367
 368        read_lock(&ksocknal_data.ksnd_global_lock);
 369
 370        conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
 371        if (conn != NULL) {
 372                ksock_sched_t *sched = conn->ksnc_scheduler;
 373
 374                LASSERT(conn->ksnc_proto->pro_queue_tx_zcack != NULL);
 375
 376                spin_lock_bh(&sched->kss_lock);
 377
 378                rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
 379
 380                spin_unlock_bh(&sched->kss_lock);
 381
 382                if (rc) { /* piggybacked */
 383                        read_unlock(&ksocknal_data.ksnd_global_lock);
 384                        return 0;
 385                }
 386        }
 387
 388        read_unlock(&ksocknal_data.ksnd_global_lock);
 389
 390        /* ACK connection is not ready, or can't piggyback the ACK */
 391        tx = ksocknal_alloc_tx_noop(cookie, !!remote);
 392        if (tx == NULL)
 393                return -ENOMEM;
 394
 395        if ((rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id)) == 0)
 396                return 0;
 397
 398        ksocknal_free_tx(tx);
 399        return rc;
 400}
 401
 402/* (Sender) handle ZC_ACK from sink */
 403static int
 404ksocknal_handle_zcack(ksock_conn_t *conn, __u64 cookie1, __u64 cookie2)
 405{
 406        ksock_peer_t      *peer = conn->ksnc_peer;
 407        ksock_tx_t      *tx;
 408        ksock_tx_t      *tmp;
 409        LIST_HEAD     (zlist);
 410        int             count;
 411
 412        if (cookie1 == 0)
 413                cookie1 = cookie2;
 414
 415        count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
 416
 417        if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
 418            conn->ksnc_proto == &ksocknal_protocol_v3x) {
 419                /* keepalive PING for V3.x, just ignore it */
 420                return count == 1 ? 0 : -EPROTO;
 421        }
 422
 423        spin_lock(&peer->ksnp_lock);
 424
 425        list_for_each_entry_safe(tx, tmp,
 426                                     &peer->ksnp_zc_req_list, tx_zc_list) {
 427                __u64 c = tx->tx_msg.ksm_zc_cookies[0];
 428
 429                if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
 430                        tx->tx_msg.ksm_zc_cookies[0] = 0;
 431                        list_del(&tx->tx_zc_list);
 432                        list_add(&tx->tx_zc_list, &zlist);
 433
 434                        if (--count == 0)
 435                                break;
 436                }
 437        }
 438
 439        spin_unlock(&peer->ksnp_lock);
 440
 441        while (!list_empty(&zlist)) {
 442                tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
 443                list_del(&tx->tx_zc_list);
 444                ksocknal_tx_decref(tx);
 445        }
 446
 447        return count == 0 ? 0 : -EPROTO;
 448}
 449
 450static int
 451ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
 452{
 453        socket_t        *sock = conn->ksnc_sock;
 454        lnet_hdr_t        *hdr;
 455        lnet_magicversion_t *hmv;
 456        int               rc;
 457        int               i;
 458
 459        CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
 460
 461        LIBCFS_ALLOC(hdr, sizeof(*hdr));
 462        if (hdr == NULL) {
 463                CERROR("Can't allocate lnet_hdr_t\n");
 464                return -ENOMEM;
 465        }
 466
 467        hmv = (lnet_magicversion_t *)&hdr->dest_nid;
 468
 469        /* Re-organize V2.x message header to V1.x (lnet_hdr_t)
 470         * header and send out */
 471        hmv->magic       = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
 472        hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR);
 473        hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR);
 474
 475        if (the_lnet.ln_testprotocompat != 0) {
 476                /* single-shot proto check */
 477                LNET_LOCK();
 478                if ((the_lnet.ln_testprotocompat & 1) != 0) {
 479                        hmv->version_major++;   /* just different! */
 480                        the_lnet.ln_testprotocompat &= ~1;
 481                }
 482                if ((the_lnet.ln_testprotocompat & 2) != 0) {
 483                        hmv->magic = LNET_PROTO_MAGIC;
 484                        the_lnet.ln_testprotocompat &= ~2;
 485                }
 486                LNET_UNLOCK();
 487        }
 488
 489        hdr->src_nid    = cpu_to_le64 (hello->kshm_src_nid);
 490        hdr->src_pid    = cpu_to_le32 (hello->kshm_src_pid);
 491        hdr->type          = cpu_to_le32 (LNET_MSG_HELLO);
 492        hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32));
 493        hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype);
 494        hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation);
 495
 496        rc = libcfs_sock_write(sock, hdr, sizeof(*hdr),lnet_acceptor_timeout());
 497
 498        if (rc != 0) {
 499                CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
 500                        rc, &conn->ksnc_ipaddr, conn->ksnc_port);
 501                goto out;
 502        }
 503
 504        if (hello->kshm_nips == 0)
 505                goto out;
 506
 507        for (i = 0; i < (int) hello->kshm_nips; i++) {
 508                hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]);
 509        }
 510
 511        rc = libcfs_sock_write(sock, hello->kshm_ips,
 512                               hello->kshm_nips * sizeof(__u32),
 513                               lnet_acceptor_timeout());
 514        if (rc != 0) {
 515                CNETERR("Error %d sending HELLO payload (%d)"
 516                        " to %pI4h/%d\n", rc, hello->kshm_nips,
 517                        &conn->ksnc_ipaddr, conn->ksnc_port);
 518        }
 519out:
 520        LIBCFS_FREE(hdr, sizeof(*hdr));
 521
 522        return rc;
 523}
 524
 525static int
 526ksocknal_send_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello)
 527{
 528        socket_t   *sock = conn->ksnc_sock;
 529        int          rc;
 530
 531        hello->kshm_magic   = LNET_PROTO_MAGIC;
 532        hello->kshm_version = conn->ksnc_proto->pro_version;
 533
 534        if (the_lnet.ln_testprotocompat != 0) {
 535                /* single-shot proto check */
 536                LNET_LOCK();
 537                if ((the_lnet.ln_testprotocompat & 1) != 0) {
 538                        hello->kshm_version++;   /* just different! */
 539                        the_lnet.ln_testprotocompat &= ~1;
 540                }
 541                LNET_UNLOCK();
 542        }
 543
 544        rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
 545                               lnet_acceptor_timeout());
 546
 547        if (rc != 0) {
 548                CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
 549                        rc, &conn->ksnc_ipaddr, conn->ksnc_port);
 550                return rc;
 551        }
 552
 553        if (hello->kshm_nips == 0)
 554                return 0;
 555
 556        rc = libcfs_sock_write(sock, hello->kshm_ips,
 557                               hello->kshm_nips * sizeof(__u32),
 558                               lnet_acceptor_timeout());
 559        if (rc != 0) {
 560                CNETERR("Error %d sending HELLO payload (%d)"
 561                        " to %pI4h/%d\n", rc, hello->kshm_nips,
 562                        &conn->ksnc_ipaddr, conn->ksnc_port);
 563        }
 564
 565        return rc;
 566}
 567
 568static int
 569ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout)
 570{
 571        socket_t        *sock = conn->ksnc_sock;
 572        lnet_hdr_t        *hdr;
 573        int               rc;
 574        int               i;
 575
 576        LIBCFS_ALLOC(hdr, sizeof(*hdr));
 577        if (hdr == NULL) {
 578                CERROR("Can't allocate lnet_hdr_t\n");
 579                return -ENOMEM;
 580        }
 581
 582        rc = libcfs_sock_read(sock, &hdr->src_nid,
 583                              sizeof (*hdr) - offsetof (lnet_hdr_t, src_nid),
 584                              timeout);
 585        if (rc != 0) {
 586                CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
 587                        rc, &conn->ksnc_ipaddr);
 588                LASSERT (rc < 0 && rc != -EALREADY);
 589                goto out;
 590        }
 591
 592        /* ...and check we got what we expected */
 593        if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) {
 594                CERROR("Expecting a HELLO hdr,"
 595                        " but got type %d from %pI4h\n",
 596                        le32_to_cpu (hdr->type),
 597                        &conn->ksnc_ipaddr);
 598                rc = -EPROTO;
 599                goto out;
 600        }
 601
 602        hello->kshm_src_nid      = le64_to_cpu (hdr->src_nid);
 603        hello->kshm_src_pid      = le32_to_cpu (hdr->src_pid);
 604        hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation);
 605        hello->kshm_ctype          = le32_to_cpu (hdr->msg.hello.type);
 606        hello->kshm_nips            = le32_to_cpu (hdr->payload_length) /
 607                                         sizeof (__u32);
 608
 609        if (hello->kshm_nips > LNET_MAX_INTERFACES) {
 610                CERROR("Bad nips %d from ip %pI4h\n",
 611                       hello->kshm_nips, &conn->ksnc_ipaddr);
 612                rc = -EPROTO;
 613                goto out;
 614        }
 615
 616        if (hello->kshm_nips == 0)
 617                goto out;
 618
 619        rc = libcfs_sock_read(sock, hello->kshm_ips,
 620                              hello->kshm_nips * sizeof(__u32), timeout);
 621        if (rc != 0) {
 622                CERROR("Error %d reading IPs from ip %pI4h\n",
 623                        rc, &conn->ksnc_ipaddr);
 624                LASSERT(rc < 0 && rc != -EALREADY);
 625                goto out;
 626        }
 627
 628        for (i = 0; i < (int) hello->kshm_nips; i++) {
 629                hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
 630
 631                if (hello->kshm_ips[i] == 0) {
 632                        CERROR("Zero IP[%d] from ip %pI4h\n",
 633                               i, &conn->ksnc_ipaddr);
 634                        rc = -EPROTO;
 635                        break;
 636                }
 637        }
 638out:
 639        LIBCFS_FREE(hdr, sizeof(*hdr));
 640
 641        return rc;
 642}
 643
 644static int
 645ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout)
 646{
 647        socket_t      *sock = conn->ksnc_sock;
 648        int             rc;
 649        int             i;
 650
 651        if (hello->kshm_magic == LNET_PROTO_MAGIC)
 652                conn->ksnc_flip = 0;
 653        else
 654                conn->ksnc_flip = 1;
 655
 656        rc = libcfs_sock_read(sock, &hello->kshm_src_nid,
 657                              offsetof(ksock_hello_msg_t, kshm_ips) -
 658                                       offsetof(ksock_hello_msg_t, kshm_src_nid),
 659                              timeout);
 660        if (rc != 0) {
 661                CERROR("Error %d reading HELLO from %pI4h\n",
 662                        rc, &conn->ksnc_ipaddr);
 663                LASSERT(rc < 0 && rc != -EALREADY);
 664                return rc;
 665        }
 666
 667        if (conn->ksnc_flip) {
 668                __swab32s(&hello->kshm_src_pid);
 669                __swab64s(&hello->kshm_src_nid);
 670                __swab32s(&hello->kshm_dst_pid);
 671                __swab64s(&hello->kshm_dst_nid);
 672                __swab64s(&hello->kshm_src_incarnation);
 673                __swab64s(&hello->kshm_dst_incarnation);
 674                __swab32s(&hello->kshm_ctype);
 675                __swab32s(&hello->kshm_nips);
 676        }
 677
 678        if (hello->kshm_nips > LNET_MAX_INTERFACES) {
 679                CERROR("Bad nips %d from ip %pI4h\n",
 680                       hello->kshm_nips, &conn->ksnc_ipaddr);
 681                return -EPROTO;
 682        }
 683
 684        if (hello->kshm_nips == 0)
 685                return 0;
 686
 687        rc = libcfs_sock_read(sock, hello->kshm_ips,
 688                              hello->kshm_nips * sizeof(__u32), timeout);
 689        if (rc != 0) {
 690                CERROR("Error %d reading IPs from ip %pI4h\n",
 691                        rc, &conn->ksnc_ipaddr);
 692                LASSERT(rc < 0 && rc != -EALREADY);
 693                return rc;
 694        }
 695
 696        for (i = 0; i < (int) hello->kshm_nips; i++) {
 697                if (conn->ksnc_flip)
 698                        __swab32s(&hello->kshm_ips[i]);
 699
 700                if (hello->kshm_ips[i] == 0) {
 701                        CERROR("Zero IP[%d] from ip %pI4h\n",
 702                               i, &conn->ksnc_ipaddr);
 703                        return -EPROTO;
 704                }
 705        }
 706
 707        return 0;
 708}
 709
 710static void
 711ksocknal_pack_msg_v1(ksock_tx_t *tx)
 712{
 713        /* V1.x has no KSOCK_MSG_NOOP */
 714        LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
 715        LASSERT(tx->tx_lnetmsg != NULL);
 716
 717        tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr;
 718        tx->tx_iov[0].iov_len  = sizeof(lnet_hdr_t);
 719
 720        tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
 721}
 722
 723static void
 724ksocknal_pack_msg_v2(ksock_tx_t *tx)
 725{
 726        tx->tx_iov[0].iov_base = (void *)&tx->tx_msg;
 727
 728        if (tx->tx_lnetmsg != NULL) {
 729                LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
 730
 731                tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
 732                tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
 733                tx->tx_resid = tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
 734        } else {
 735                LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
 736
 737                tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
 738                tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t,  ksm_u.lnetmsg.ksnm_hdr);
 739        }
 740        /* Don't checksum before start sending, because packet can be piggybacked with ACK */
 741}
 742
 743static void
 744ksocknal_unpack_msg_v1(ksock_msg_t *msg)
 745{
 746        msg->ksm_csum      = 0;
 747        msg->ksm_type      = KSOCK_MSG_LNET;
 748        msg->ksm_zc_cookies[0]  = msg->ksm_zc_cookies[1]  = 0;
 749}
 750
 751static void
 752ksocknal_unpack_msg_v2(ksock_msg_t *msg)
 753{
 754        return;  /* Do nothing */
 755}
 756
 757ksock_proto_t  ksocknal_protocol_v1x =
 758{
 759        .pro_version        = KSOCK_PROTO_V1,
 760        .pro_send_hello  = ksocknal_send_hello_v1,
 761        .pro_recv_hello  = ksocknal_recv_hello_v1,
 762        .pro_pack              = ksocknal_pack_msg_v1,
 763        .pro_unpack          = ksocknal_unpack_msg_v1,
 764        .pro_queue_tx_msg       = ksocknal_queue_tx_msg_v1,
 765        .pro_handle_zcreq       = NULL,
 766        .pro_handle_zcack       = NULL,
 767        .pro_queue_tx_zcack     = NULL,
 768        .pro_match_tx      = ksocknal_match_tx
 769};
 770
 771ksock_proto_t  ksocknal_protocol_v2x =
 772{
 773        .pro_version        = KSOCK_PROTO_V2,
 774        .pro_send_hello  = ksocknal_send_hello_v2,
 775        .pro_recv_hello  = ksocknal_recv_hello_v2,
 776        .pro_pack              = ksocknal_pack_msg_v2,
 777        .pro_unpack          = ksocknal_unpack_msg_v2,
 778        .pro_queue_tx_msg       = ksocknal_queue_tx_msg_v2,
 779        .pro_queue_tx_zcack     = ksocknal_queue_tx_zcack_v2,
 780        .pro_handle_zcreq       = ksocknal_handle_zcreq,
 781        .pro_handle_zcack       = ksocknal_handle_zcack,
 782        .pro_match_tx      = ksocknal_match_tx
 783};
 784
 785ksock_proto_t  ksocknal_protocol_v3x =
 786{
 787        .pro_version        = KSOCK_PROTO_V3,
 788        .pro_send_hello  = ksocknal_send_hello_v2,
 789        .pro_recv_hello  = ksocknal_recv_hello_v2,
 790        .pro_pack              = ksocknal_pack_msg_v2,
 791        .pro_unpack          = ksocknal_unpack_msg_v2,
 792        .pro_queue_tx_msg       = ksocknal_queue_tx_msg_v2,
 793        .pro_queue_tx_zcack     = ksocknal_queue_tx_zcack_v3,
 794        .pro_handle_zcreq       = ksocknal_handle_zcreq,
 795        .pro_handle_zcack       = ksocknal_handle_zcack,
 796        .pro_match_tx      = ksocknal_match_tx_v3
 797};
 798