linux/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_proto.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
   3 *
   4 * Copyright (c) 2012, Intel Corporation.
   5 *
   6 *   Author: Zach Brown <zab@zabbo.net>
   7 *   Author: Peter J. Braam <braam@clusterfs.com>
   8 *   Author: Phil Schwan <phil@clusterfs.com>
   9 *   Author: Eric Barton <eric@bartonsoftware.com>
  10 *
  11 *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
  12 *
  13 *   Portals is free software; you can redistribute it and/or
  14 *   modify it under the terms of version 2 of the GNU General Public
  15 *   License as published by the Free Software Foundation.
  16 *
  17 *   Portals is distributed in the hope that it will be useful,
  18 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 *   GNU General Public License for more details.
  21 *
  22 */
  23
  24#include "socklnd.h"
  25
  26/*
  27 * Protocol entries :
  28 *   pro_send_hello       : send hello message
  29 *   pro_recv_hello       : receive hello message
  30 *   pro_pack        : pack message header
  31 *   pro_unpack    : unpack message header
  32 *   pro_queue_tx_zcack() : Called holding BH lock: kss_lock
  33 *                        return 1 if ACK is piggybacked, otherwise return 0
  34 *   pro_queue_tx_msg()   : Called holding BH lock: kss_lock
  35 *                        return the ACK that piggybacked by my message, or NULL
  36 *   pro_handle_zcreq()   : handler of incoming ZC-REQ
  37 *   pro_handle_zcack()   : handler of incoming ZC-ACK
  38 *   pro_match_tx()       : Called holding glock
  39 */
  40
  41static struct ksock_tx *
  42ksocknal_queue_tx_msg_v1(struct ksock_conn *conn, struct ksock_tx *tx_msg)
  43{
  44        /* V1.x, just enqueue it */
  45        list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
  46        return NULL;
  47}
  48
  49void
  50ksocknal_next_tx_carrier(struct ksock_conn *conn)
  51{
  52        struct ksock_tx *tx = conn->ksnc_tx_carrier;
  53
  54        /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */
  55        LASSERT(!list_empty(&conn->ksnc_tx_queue));
  56        LASSERT(tx);
  57
  58        /* Next TX that can carry ZC-ACK or LNet message */
  59        if (tx->tx_list.next == &conn->ksnc_tx_queue) {
  60                /* no more packets queued */
  61                conn->ksnc_tx_carrier = NULL;
  62        } else {
  63                conn->ksnc_tx_carrier = list_next_entry(tx, tx_list);
  64                LASSERT(conn->ksnc_tx_carrier->tx_msg.ksm_type == tx->tx_msg.ksm_type);
  65        }
  66}
  67
  68static int
  69ksocknal_queue_tx_zcack_v2(struct ksock_conn *conn,
  70                           struct ksock_tx *tx_ack, __u64 cookie)
  71{
  72        struct ksock_tx *tx = conn->ksnc_tx_carrier;
  73
  74        LASSERT(!tx_ack ||
  75                tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
  76
  77        /*
  78         * Enqueue or piggyback tx_ack / cookie
  79         * . no tx can piggyback cookie of tx_ack (or cookie), just
  80         *   enqueue the tx_ack (if tx_ack != NUL) and return NULL.
  81         * . There is tx can piggyback cookie of tx_ack (or cookie),
  82         *   piggyback the cookie and return the tx.
  83         */
  84        if (!tx) {
  85                if (tx_ack) {
  86                        list_add_tail(&tx_ack->tx_list,
  87                                      &conn->ksnc_tx_queue);
  88                        conn->ksnc_tx_carrier = tx_ack;
  89                }
  90                return 0;
  91        }
  92
  93        if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) {
  94                /* tx is noop zc-ack, can't piggyback zc-ack cookie */
  95                if (tx_ack)
  96                        list_add_tail(&tx_ack->tx_list,
  97                                      &conn->ksnc_tx_queue);
  98                return 0;
  99        }
 100
 101        LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET);
 102        LASSERT(!tx->tx_msg.ksm_zc_cookies[1]);
 103
 104        if (tx_ack)
 105                cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
 106
 107        /* piggyback the zc-ack cookie */
 108        tx->tx_msg.ksm_zc_cookies[1] = cookie;
 109        /* move on to the next TX which can carry cookie */
 110        ksocknal_next_tx_carrier(conn);
 111
 112        return 1;
 113}
 114
 115static struct ksock_tx *
 116ksocknal_queue_tx_msg_v2(struct ksock_conn *conn, struct ksock_tx *tx_msg)
 117{
 118        struct ksock_tx *tx  = conn->ksnc_tx_carrier;
 119
 120        /*
 121         * Enqueue tx_msg:
 122         * . If there is no NOOP on the connection, just enqueue
 123         *   tx_msg and return NULL
 124         * . If there is NOOP on the connection, piggyback the cookie
 125         *   and replace the NOOP tx, and return the NOOP tx.
 126         */
 127        if (!tx) { /* nothing on queue */
 128                list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
 129                conn->ksnc_tx_carrier = tx_msg;
 130                return NULL;
 131        }
 132
 133        if (tx->tx_msg.ksm_type == KSOCK_MSG_LNET) { /* nothing to carry */
 134                list_add_tail(&tx_msg->tx_list, &conn->ksnc_tx_queue);
 135                return NULL;
 136        }
 137
 138        LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
 139
 140        /* There is a noop zc-ack can be piggybacked */
 141        tx_msg->tx_msg.ksm_zc_cookies[1] = tx->tx_msg.ksm_zc_cookies[1];
 142        ksocknal_next_tx_carrier(conn);
 143
 144        /* use new_tx to replace the noop zc-ack packet */
 145        list_add(&tx_msg->tx_list, &tx->tx_list);
 146        list_del(&tx->tx_list);
 147
 148        return tx;
 149}
 150
 151static int
 152ksocknal_queue_tx_zcack_v3(struct ksock_conn *conn,
 153                           struct ksock_tx *tx_ack, __u64 cookie)
 154{
 155        struct ksock_tx *tx;
 156
 157        if (conn->ksnc_type != SOCKLND_CONN_ACK)
 158                return ksocknal_queue_tx_zcack_v2(conn, tx_ack, cookie);
 159
 160        /* non-blocking ZC-ACK (to router) */
 161        LASSERT(!tx_ack ||
 162                tx_ack->tx_msg.ksm_type == KSOCK_MSG_NOOP);
 163
 164        tx = conn->ksnc_tx_carrier;
 165        if (!tx) {
 166                if (tx_ack) {
 167                        list_add_tail(&tx_ack->tx_list,
 168                                      &conn->ksnc_tx_queue);
 169                        conn->ksnc_tx_carrier = tx_ack;
 170                }
 171                return 0;
 172        }
 173
 174        /* conn->ksnc_tx_carrier */
 175
 176        if (tx_ack)
 177                cookie = tx_ack->tx_msg.ksm_zc_cookies[1];
 178
 179        if (cookie == SOCKNAL_KEEPALIVE_PING) /* ignore keepalive PING */
 180                return 1;
 181
 182        if (tx->tx_msg.ksm_zc_cookies[1] == SOCKNAL_KEEPALIVE_PING) {
 183                /* replace the keepalive PING with a real ACK */
 184                LASSERT(!tx->tx_msg.ksm_zc_cookies[0]);
 185                tx->tx_msg.ksm_zc_cookies[1] = cookie;
 186                return 1;
 187        }
 188
 189        if (cookie == tx->tx_msg.ksm_zc_cookies[0] ||
 190            cookie == tx->tx_msg.ksm_zc_cookies[1]) {
 191                CWARN("%s: duplicated ZC cookie: %llu\n",
 192                      libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
 193                return 1; /* XXX return error in the future */
 194        }
 195
 196        if (!tx->tx_msg.ksm_zc_cookies[0]) {
 197                /* NOOP tx has only one ZC-ACK cookie, can carry at least one more */
 198                if (tx->tx_msg.ksm_zc_cookies[1] > cookie) {
 199                        tx->tx_msg.ksm_zc_cookies[0] = tx->tx_msg.ksm_zc_cookies[1];
 200                        tx->tx_msg.ksm_zc_cookies[1] = cookie;
 201                } else {
 202                        tx->tx_msg.ksm_zc_cookies[0] = cookie;
 203                }
 204
 205                if (tx->tx_msg.ksm_zc_cookies[0] - tx->tx_msg.ksm_zc_cookies[1] > 2) {
 206                        /* not likely to carry more ACKs, skip it to simplify logic */
 207                        ksocknal_next_tx_carrier(conn);
 208                }
 209
 210                return 1;
 211        }
 212
 213        /* takes two or more cookies already */
 214
 215        if (tx->tx_msg.ksm_zc_cookies[0] > tx->tx_msg.ksm_zc_cookies[1]) {
 216                __u64   tmp = 0;
 217
 218                /* two separated cookies: (a+2, a) or (a+1, a) */
 219                LASSERT(tx->tx_msg.ksm_zc_cookies[0] -
 220                         tx->tx_msg.ksm_zc_cookies[1] <= 2);
 221
 222                if (tx->tx_msg.ksm_zc_cookies[0] -
 223                    tx->tx_msg.ksm_zc_cookies[1] == 2) {
 224                        if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1)
 225                                tmp = cookie;
 226                } else if (cookie == tx->tx_msg.ksm_zc_cookies[1] - 1) {
 227                        tmp = tx->tx_msg.ksm_zc_cookies[1];
 228                } else if (cookie == tx->tx_msg.ksm_zc_cookies[0] + 1) {
 229                        tmp = tx->tx_msg.ksm_zc_cookies[0];
 230                }
 231
 232                if (tmp) {
 233                        /* range of cookies */
 234                        tx->tx_msg.ksm_zc_cookies[0] = tmp - 1;
 235                        tx->tx_msg.ksm_zc_cookies[1] = tmp + 1;
 236                        return 1;
 237                }
 238
 239        } else {
 240                /* ksm_zc_cookies[0] < ksm_zc_cookies[1], it is range of cookies */
 241                if (cookie >= tx->tx_msg.ksm_zc_cookies[0] &&
 242                    cookie <= tx->tx_msg.ksm_zc_cookies[1]) {
 243                        CWARN("%s: duplicated ZC cookie: %llu\n",
 244                              libcfs_id2str(conn->ksnc_peer->ksnp_id), cookie);
 245                        return 1; /* XXX: return error in the future */
 246                }
 247
 248                if (cookie == tx->tx_msg.ksm_zc_cookies[1] + 1) {
 249                        tx->tx_msg.ksm_zc_cookies[1] = cookie;
 250                        return 1;
 251                }
 252
 253                if (cookie == tx->tx_msg.ksm_zc_cookies[0] - 1) {
 254                        tx->tx_msg.ksm_zc_cookies[0] = cookie;
 255                        return 1;
 256                }
 257        }
 258
 259        /* failed to piggyback ZC-ACK */
 260        if (tx_ack) {
 261                list_add_tail(&tx_ack->tx_list, &conn->ksnc_tx_queue);
 262                /* the next tx can piggyback at least 1 ACK */
 263                ksocknal_next_tx_carrier(conn);
 264        }
 265
 266        return 0;
 267}
 268
 269static int
 270ksocknal_match_tx(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
 271{
 272        int nob;
 273
 274#if SOCKNAL_VERSION_DEBUG
 275        if (!*ksocknal_tunables.ksnd_typed_conns)
 276                return SOCKNAL_MATCH_YES;
 277#endif
 278
 279        if (!tx || !tx->tx_lnetmsg) {
 280                /* noop packet */
 281                nob = offsetof(ksock_msg_t, ksm_u);
 282        } else {
 283                nob = tx->tx_lnetmsg->msg_len +
 284                      ((conn->ksnc_proto == &ksocknal_protocol_v1x) ?
 285                       sizeof(lnet_hdr_t) : sizeof(ksock_msg_t));
 286        }
 287
 288        /* default checking for typed connection */
 289        switch (conn->ksnc_type) {
 290        default:
 291                CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
 292                LBUG();
 293        case SOCKLND_CONN_ANY:
 294                return SOCKNAL_MATCH_YES;
 295
 296        case SOCKLND_CONN_BULK_IN:
 297                return SOCKNAL_MATCH_MAY;
 298
 299        case SOCKLND_CONN_BULK_OUT:
 300                if (nob < *ksocknal_tunables.ksnd_min_bulk)
 301                        return SOCKNAL_MATCH_MAY;
 302                else
 303                        return SOCKNAL_MATCH_YES;
 304
 305        case SOCKLND_CONN_CONTROL:
 306                if (nob >= *ksocknal_tunables.ksnd_min_bulk)
 307                        return SOCKNAL_MATCH_MAY;
 308                else
 309                        return SOCKNAL_MATCH_YES;
 310        }
 311}
 312
 313static int
 314ksocknal_match_tx_v3(struct ksock_conn *conn, struct ksock_tx *tx, int nonblk)
 315{
 316        int nob;
 317
 318        if (!tx || !tx->tx_lnetmsg)
 319                nob = offsetof(ksock_msg_t, ksm_u);
 320        else
 321                nob = tx->tx_lnetmsg->msg_len + sizeof(ksock_msg_t);
 322
 323        switch (conn->ksnc_type) {
 324        default:
 325                CERROR("ksnc_type bad: %u\n", conn->ksnc_type);
 326                LBUG();
 327        case SOCKLND_CONN_ANY:
 328                return SOCKNAL_MATCH_NO;
 329
 330        case SOCKLND_CONN_ACK:
 331                if (nonblk)
 332                        return SOCKNAL_MATCH_YES;
 333                else if (!tx || !tx->tx_lnetmsg)
 334                        return SOCKNAL_MATCH_MAY;
 335                else
 336                        return SOCKNAL_MATCH_NO;
 337
 338        case SOCKLND_CONN_BULK_OUT:
 339                if (nonblk)
 340                        return SOCKNAL_MATCH_NO;
 341                else if (nob < *ksocknal_tunables.ksnd_min_bulk)
 342                        return SOCKNAL_MATCH_MAY;
 343                else
 344                        return SOCKNAL_MATCH_YES;
 345
 346        case SOCKLND_CONN_CONTROL:
 347                if (nonblk)
 348                        return SOCKNAL_MATCH_NO;
 349                else if (nob >= *ksocknal_tunables.ksnd_min_bulk)
 350                        return SOCKNAL_MATCH_MAY;
 351                else
 352                        return SOCKNAL_MATCH_YES;
 353        }
 354}
 355
 356/* (Sink) handle incoming ZC request from sender */
 357static int
 358ksocknal_handle_zcreq(struct ksock_conn *c, __u64 cookie, int remote)
 359{
 360        struct ksock_peer *peer = c->ksnc_peer;
 361        struct ksock_conn *conn;
 362        struct ksock_tx *tx;
 363        int rc;
 364
 365        read_lock(&ksocknal_data.ksnd_global_lock);
 366
 367        conn = ksocknal_find_conn_locked(peer, NULL, !!remote);
 368        if (conn) {
 369                struct ksock_sched *sched = conn->ksnc_scheduler;
 370
 371                LASSERT(conn->ksnc_proto->pro_queue_tx_zcack);
 372
 373                spin_lock_bh(&sched->kss_lock);
 374
 375                rc = conn->ksnc_proto->pro_queue_tx_zcack(conn, NULL, cookie);
 376
 377                spin_unlock_bh(&sched->kss_lock);
 378
 379                if (rc) { /* piggybacked */
 380                        read_unlock(&ksocknal_data.ksnd_global_lock);
 381                        return 0;
 382                }
 383        }
 384
 385        read_unlock(&ksocknal_data.ksnd_global_lock);
 386
 387        /* ACK connection is not ready, or can't piggyback the ACK */
 388        tx = ksocknal_alloc_tx_noop(cookie, !!remote);
 389        if (!tx)
 390                return -ENOMEM;
 391
 392        rc = ksocknal_launch_packet(peer->ksnp_ni, tx, peer->ksnp_id);
 393        if (!rc)
 394                return 0;
 395
 396        ksocknal_free_tx(tx);
 397        return rc;
 398}
 399
 400/* (Sender) handle ZC_ACK from sink */
 401static int
 402ksocknal_handle_zcack(struct ksock_conn *conn, __u64 cookie1, __u64 cookie2)
 403{
 404        struct ksock_peer *peer = conn->ksnc_peer;
 405        struct ksock_tx *tx;
 406        struct ksock_tx *temp;
 407        struct ksock_tx *tmp;
 408        LIST_HEAD(zlist);
 409        int count;
 410
 411        if (!cookie1)
 412                cookie1 = cookie2;
 413
 414        count = (cookie1 > cookie2) ? 2 : (cookie2 - cookie1 + 1);
 415
 416        if (cookie2 == SOCKNAL_KEEPALIVE_PING &&
 417            conn->ksnc_proto == &ksocknal_protocol_v3x) {
 418                /* keepalive PING for V3.x, just ignore it */
 419                return count == 1 ? 0 : -EPROTO;
 420        }
 421
 422        spin_lock(&peer->ksnp_lock);
 423
 424        list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list,
 425                                 tx_zc_list) {
 426                __u64 c = tx->tx_msg.ksm_zc_cookies[0];
 427
 428                if (c == cookie1 || c == cookie2 || (cookie1 < c && c < cookie2)) {
 429                        tx->tx_msg.ksm_zc_cookies[0] = 0;
 430                        list_del(&tx->tx_zc_list);
 431                        list_add(&tx->tx_zc_list, &zlist);
 432
 433                        if (!--count)
 434                                break;
 435                }
 436        }
 437
 438        spin_unlock(&peer->ksnp_lock);
 439
 440        list_for_each_entry_safe(tx, temp, &zlist, tx_zc_list) {
 441                list_del(&tx->tx_zc_list);
 442                ksocknal_tx_decref(tx);
 443        }
 444
 445        return !count ? 0 : -EPROTO;
 446}
 447
 448static int
 449ksocknal_send_hello_v1(struct ksock_conn *conn, ksock_hello_msg_t *hello)
 450{
 451        struct socket *sock = conn->ksnc_sock;
 452        lnet_hdr_t *hdr;
 453        lnet_magicversion_t *hmv;
 454        int rc;
 455        int i;
 456
 457        CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid));
 458
 459        LIBCFS_ALLOC(hdr, sizeof(*hdr));
 460        if (!hdr) {
 461                CERROR("Can't allocate lnet_hdr_t\n");
 462                return -ENOMEM;
 463        }
 464
 465        hmv = (lnet_magicversion_t *)&hdr->dest_nid;
 466
 467        /*
 468         * Re-organize V2.x message header to V1.x (lnet_hdr_t)
 469         * header and send out
 470         */
 471        hmv->magic         = cpu_to_le32(LNET_PROTO_TCP_MAGIC);
 472        hmv->version_major = cpu_to_le16(KSOCK_PROTO_V1_MAJOR);
 473        hmv->version_minor = cpu_to_le16(KSOCK_PROTO_V1_MINOR);
 474
 475        if (the_lnet.ln_testprotocompat) {
 476                /* single-shot proto check */
 477                LNET_LOCK();
 478                if (the_lnet.ln_testprotocompat & 1) {
 479                        hmv->version_major++;   /* just different! */
 480                        the_lnet.ln_testprotocompat &= ~1;
 481                }
 482                if (the_lnet.ln_testprotocompat & 2) {
 483                        hmv->magic = LNET_PROTO_MAGIC;
 484                        the_lnet.ln_testprotocompat &= ~2;
 485                }
 486                LNET_UNLOCK();
 487        }
 488
 489        hdr->src_nid = cpu_to_le64(hello->kshm_src_nid);
 490        hdr->src_pid = cpu_to_le32(hello->kshm_src_pid);
 491        hdr->type = cpu_to_le32(LNET_MSG_HELLO);
 492        hdr->payload_length = cpu_to_le32(hello->kshm_nips * sizeof(__u32));
 493        hdr->msg.hello.type = cpu_to_le32(hello->kshm_ctype);
 494        hdr->msg.hello.incarnation = cpu_to_le64(hello->kshm_src_incarnation);
 495
 496        rc = lnet_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout());
 497        if (rc) {
 498                CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
 499                        rc, &conn->ksnc_ipaddr, conn->ksnc_port);
 500                goto out;
 501        }
 502
 503        if (!hello->kshm_nips)
 504                goto out;
 505
 506        for (i = 0; i < (int)hello->kshm_nips; i++)
 507                hello->kshm_ips[i] = __cpu_to_le32(hello->kshm_ips[i]);
 508
 509        rc = lnet_sock_write(sock, hello->kshm_ips,
 510                             hello->kshm_nips * sizeof(__u32),
 511                             lnet_acceptor_timeout());
 512        if (rc) {
 513                CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
 514                        rc, hello->kshm_nips,
 515                        &conn->ksnc_ipaddr, conn->ksnc_port);
 516        }
 517out:
 518        LIBCFS_FREE(hdr, sizeof(*hdr));
 519
 520        return rc;
 521}
 522
 523static int
 524ksocknal_send_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello)
 525{
 526        struct socket *sock = conn->ksnc_sock;
 527        int rc;
 528
 529        hello->kshm_magic   = LNET_PROTO_MAGIC;
 530        hello->kshm_version = conn->ksnc_proto->pro_version;
 531
 532        if (the_lnet.ln_testprotocompat) {
 533                /* single-shot proto check */
 534                LNET_LOCK();
 535                if (the_lnet.ln_testprotocompat & 1) {
 536                        hello->kshm_version++;   /* just different! */
 537                        the_lnet.ln_testprotocompat &= ~1;
 538                }
 539                LNET_UNLOCK();
 540        }
 541
 542        rc = lnet_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips),
 543                             lnet_acceptor_timeout());
 544        if (rc) {
 545                CNETERR("Error %d sending HELLO hdr to %pI4h/%d\n",
 546                        rc, &conn->ksnc_ipaddr, conn->ksnc_port);
 547                return rc;
 548        }
 549
 550        if (!hello->kshm_nips)
 551                return 0;
 552
 553        rc = lnet_sock_write(sock, hello->kshm_ips,
 554                             hello->kshm_nips * sizeof(__u32),
 555                             lnet_acceptor_timeout());
 556        if (rc) {
 557                CNETERR("Error %d sending HELLO payload (%d) to %pI4h/%d\n",
 558                        rc, hello->kshm_nips,
 559                        &conn->ksnc_ipaddr, conn->ksnc_port);
 560        }
 561
 562        return rc;
 563}
 564
 565static int
 566ksocknal_recv_hello_v1(struct ksock_conn *conn, ksock_hello_msg_t *hello,
 567                       int timeout)
 568{
 569        struct socket *sock = conn->ksnc_sock;
 570        lnet_hdr_t *hdr;
 571        int rc;
 572        int i;
 573
 574        LIBCFS_ALLOC(hdr, sizeof(*hdr));
 575        if (!hdr) {
 576                CERROR("Can't allocate lnet_hdr_t\n");
 577                return -ENOMEM;
 578        }
 579
 580        rc = lnet_sock_read(sock, &hdr->src_nid,
 581                            sizeof(*hdr) - offsetof(lnet_hdr_t, src_nid),
 582                            timeout);
 583        if (rc) {
 584                CERROR("Error %d reading rest of HELLO hdr from %pI4h\n",
 585                       rc, &conn->ksnc_ipaddr);
 586                LASSERT(rc < 0 && rc != -EALREADY);
 587                goto out;
 588        }
 589
 590        /* ...and check we got what we expected */
 591        if (hdr->type != cpu_to_le32(LNET_MSG_HELLO)) {
 592                CERROR("Expecting a HELLO hdr, but got type %d from %pI4h\n",
 593                       le32_to_cpu(hdr->type),
 594                       &conn->ksnc_ipaddr);
 595                rc = -EPROTO;
 596                goto out;
 597        }
 598
 599        hello->kshm_src_nid         = le64_to_cpu(hdr->src_nid);
 600        hello->kshm_src_pid         = le32_to_cpu(hdr->src_pid);
 601        hello->kshm_src_incarnation = le64_to_cpu(hdr->msg.hello.incarnation);
 602        hello->kshm_ctype           = le32_to_cpu(hdr->msg.hello.type);
 603        hello->kshm_nips            = le32_to_cpu(hdr->payload_length) /
 604                                                  sizeof(__u32);
 605
 606        if (hello->kshm_nips > LNET_MAX_INTERFACES) {
 607                CERROR("Bad nips %d from ip %pI4h\n",
 608                       hello->kshm_nips, &conn->ksnc_ipaddr);
 609                rc = -EPROTO;
 610                goto out;
 611        }
 612
 613        if (!hello->kshm_nips)
 614                goto out;
 615
 616        rc = lnet_sock_read(sock, hello->kshm_ips,
 617                            hello->kshm_nips * sizeof(__u32), timeout);
 618        if (rc) {
 619                CERROR("Error %d reading IPs from ip %pI4h\n",
 620                       rc, &conn->ksnc_ipaddr);
 621                LASSERT(rc < 0 && rc != -EALREADY);
 622                goto out;
 623        }
 624
 625        for (i = 0; i < (int)hello->kshm_nips; i++) {
 626                hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]);
 627
 628                if (!hello->kshm_ips[i]) {
 629                        CERROR("Zero IP[%d] from ip %pI4h\n",
 630                               i, &conn->ksnc_ipaddr);
 631                        rc = -EPROTO;
 632                        break;
 633                }
 634        }
 635out:
 636        LIBCFS_FREE(hdr, sizeof(*hdr));
 637
 638        return rc;
 639}
 640
 641static int
 642ksocknal_recv_hello_v2(struct ksock_conn *conn, ksock_hello_msg_t *hello, int timeout)
 643{
 644        struct socket *sock = conn->ksnc_sock;
 645        int rc;
 646        int i;
 647
 648        if (hello->kshm_magic == LNET_PROTO_MAGIC)
 649                conn->ksnc_flip = 0;
 650        else
 651                conn->ksnc_flip = 1;
 652
 653        rc = lnet_sock_read(sock, &hello->kshm_src_nid,
 654                            offsetof(ksock_hello_msg_t, kshm_ips) -
 655                                     offsetof(ksock_hello_msg_t, kshm_src_nid),
 656                            timeout);
 657        if (rc) {
 658                CERROR("Error %d reading HELLO from %pI4h\n",
 659                       rc, &conn->ksnc_ipaddr);
 660                LASSERT(rc < 0 && rc != -EALREADY);
 661                return rc;
 662        }
 663
 664        if (conn->ksnc_flip) {
 665                __swab32s(&hello->kshm_src_pid);
 666                __swab64s(&hello->kshm_src_nid);
 667                __swab32s(&hello->kshm_dst_pid);
 668                __swab64s(&hello->kshm_dst_nid);
 669                __swab64s(&hello->kshm_src_incarnation);
 670                __swab64s(&hello->kshm_dst_incarnation);
 671                __swab32s(&hello->kshm_ctype);
 672                __swab32s(&hello->kshm_nips);
 673        }
 674
 675        if (hello->kshm_nips > LNET_MAX_INTERFACES) {
 676                CERROR("Bad nips %d from ip %pI4h\n",
 677                       hello->kshm_nips, &conn->ksnc_ipaddr);
 678                return -EPROTO;
 679        }
 680
 681        if (!hello->kshm_nips)
 682                return 0;
 683
 684        rc = lnet_sock_read(sock, hello->kshm_ips,
 685                            hello->kshm_nips * sizeof(__u32), timeout);
 686        if (rc) {
 687                CERROR("Error %d reading IPs from ip %pI4h\n",
 688                       rc, &conn->ksnc_ipaddr);
 689                LASSERT(rc < 0 && rc != -EALREADY);
 690                return rc;
 691        }
 692
 693        for (i = 0; i < (int)hello->kshm_nips; i++) {
 694                if (conn->ksnc_flip)
 695                        __swab32s(&hello->kshm_ips[i]);
 696
 697                if (!hello->kshm_ips[i]) {
 698                        CERROR("Zero IP[%d] from ip %pI4h\n",
 699                               i, &conn->ksnc_ipaddr);
 700                        return -EPROTO;
 701                }
 702        }
 703
 704        return 0;
 705}
 706
 707static void
 708ksocknal_pack_msg_v1(struct ksock_tx *tx)
 709{
 710        /* V1.x has no KSOCK_MSG_NOOP */
 711        LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
 712        LASSERT(tx->tx_lnetmsg);
 713
 714        tx->tx_iov[0].iov_base = &tx->tx_lnetmsg->msg_hdr;
 715        tx->tx_iov[0].iov_len  = sizeof(lnet_hdr_t);
 716
 717        tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
 718        tx->tx_resid = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t);
 719}
 720
 721static void
 722ksocknal_pack_msg_v2(struct ksock_tx *tx)
 723{
 724        tx->tx_iov[0].iov_base = &tx->tx_msg;
 725
 726        if (tx->tx_lnetmsg) {
 727                LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP);
 728
 729                tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr;
 730                tx->tx_iov[0].iov_len = sizeof(ksock_msg_t);
 731                tx->tx_nob = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
 732                tx->tx_resid = sizeof(ksock_msg_t) + tx->tx_lnetmsg->msg_len;
 733        } else {
 734                LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP);
 735
 736                tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr);
 737                tx->tx_nob = offsetof(ksock_msg_t,  ksm_u.lnetmsg.ksnm_hdr);
 738                tx->tx_resid = offsetof(ksock_msg_t,  ksm_u.lnetmsg.ksnm_hdr);
 739        }
 740        /* Don't checksum before start sending, because packet can be piggybacked with ACK */
 741}
 742
 743static void
 744ksocknal_unpack_msg_v1(ksock_msg_t *msg)
 745{
 746        msg->ksm_csum = 0;
 747        msg->ksm_type = KSOCK_MSG_LNET;
 748        msg->ksm_zc_cookies[0] = 0;
 749        msg->ksm_zc_cookies[1] = 0;
 750}
 751
 752static void
 753ksocknal_unpack_msg_v2(ksock_msg_t *msg)
 754{
 755        return;  /* Do nothing */
 756}
 757
 758struct ksock_proto ksocknal_protocol_v1x = {
 759        .pro_version        = KSOCK_PROTO_V1,
 760        .pro_send_hello     = ksocknal_send_hello_v1,
 761        .pro_recv_hello     = ksocknal_recv_hello_v1,
 762        .pro_pack           = ksocknal_pack_msg_v1,
 763        .pro_unpack         = ksocknal_unpack_msg_v1,
 764        .pro_queue_tx_msg   = ksocknal_queue_tx_msg_v1,
 765        .pro_handle_zcreq   = NULL,
 766        .pro_handle_zcack   = NULL,
 767        .pro_queue_tx_zcack = NULL,
 768        .pro_match_tx       = ksocknal_match_tx
 769};
 770
 771struct ksock_proto ksocknal_protocol_v2x = {
 772        .pro_version        = KSOCK_PROTO_V2,
 773        .pro_send_hello     = ksocknal_send_hello_v2,
 774        .pro_recv_hello     = ksocknal_recv_hello_v2,
 775        .pro_pack           = ksocknal_pack_msg_v2,
 776        .pro_unpack         = ksocknal_unpack_msg_v2,
 777        .pro_queue_tx_msg   = ksocknal_queue_tx_msg_v2,
 778        .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v2,
 779        .pro_handle_zcreq   = ksocknal_handle_zcreq,
 780        .pro_handle_zcack   = ksocknal_handle_zcack,
 781        .pro_match_tx       = ksocknal_match_tx
 782};
 783
 784struct ksock_proto ksocknal_protocol_v3x = {
 785        .pro_version        = KSOCK_PROTO_V3,
 786        .pro_send_hello     = ksocknal_send_hello_v2,
 787        .pro_recv_hello     = ksocknal_recv_hello_v2,
 788        .pro_pack           = ksocknal_pack_msg_v2,
 789        .pro_unpack         = ksocknal_unpack_msg_v2,
 790        .pro_queue_tx_msg   = ksocknal_queue_tx_msg_v2,
 791        .pro_queue_tx_zcack = ksocknal_queue_tx_zcack_v3,
 792        .pro_handle_zcreq   = ksocknal_handle_zcreq,
 793        .pro_handle_zcack   = ksocknal_handle_zcack,
 794        .pro_match_tx       = ksocknal_match_tx_v3
 795};
 796