linux/drivers/infiniband/hw/cxgb4/cm.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32#include <linux/module.h>
  33#include <linux/list.h>
  34#include <linux/workqueue.h>
  35#include <linux/skbuff.h>
  36#include <linux/timer.h>
  37#include <linux/notifier.h>
  38#include <linux/inetdevice.h>
  39#include <linux/ip.h>
  40#include <linux/tcp.h>
  41#include <linux/if_vlan.h>
  42
  43#include <net/neighbour.h>
  44#include <net/netevent.h>
  45#include <net/route.h>
  46#include <net/tcp.h>
  47#include <net/ip6_route.h>
  48#include <net/addrconf.h>
  49
  50#include <rdma/ib_addr.h>
  51
  52#include <libcxgb_cm.h>
  53#include "iw_cxgb4.h"
  54#include "clip_tbl.h"
  55
  56static char *states[] = {
  57        "idle",
  58        "listen",
  59        "connecting",
  60        "mpa_wait_req",
  61        "mpa_req_sent",
  62        "mpa_req_rcvd",
  63        "mpa_rep_sent",
  64        "fpdu_mode",
  65        "aborting",
  66        "closing",
  67        "moribund",
  68        "dead",
  69        NULL,
  70};
  71
  72static int nocong;
  73module_param(nocong, int, 0644);
  74MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
  75
  76static int enable_ecn;
  77module_param(enable_ecn, int, 0644);
  78MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
  79
  80static int dack_mode;
  81module_param(dack_mode, int, 0644);
  82MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
  83
  84uint c4iw_max_read_depth = 32;
  85module_param(c4iw_max_read_depth, int, 0644);
  86MODULE_PARM_DESC(c4iw_max_read_depth,
  87                 "Per-connection max ORD/IRD (default=32)");
  88
  89static int enable_tcp_timestamps;
  90module_param(enable_tcp_timestamps, int, 0644);
  91MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
  92
  93static int enable_tcp_sack;
  94module_param(enable_tcp_sack, int, 0644);
  95MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
  96
  97static int enable_tcp_window_scaling = 1;
  98module_param(enable_tcp_window_scaling, int, 0644);
  99MODULE_PARM_DESC(enable_tcp_window_scaling,
 100                 "Enable tcp window scaling (default=1)");
 101
 102static int peer2peer = 1;
 103module_param(peer2peer, int, 0644);
 104MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
 105
 106static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
 107module_param(p2p_type, int, 0644);
 108MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
 109                           "1=RDMA_READ 0=RDMA_WRITE (default 1)");
 110
 111static int ep_timeout_secs = 60;
 112module_param(ep_timeout_secs, int, 0644);
 113MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
 114                                   "in seconds (default=60)");
 115
 116static int mpa_rev = 2;
 117module_param(mpa_rev, int, 0644);
 118MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
 119                "1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
 120                " compliant (default=2)");
 121
 122static int markers_enabled;
 123module_param(markers_enabled, int, 0644);
 124MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
 125
 126static int crc_enabled = 1;
 127module_param(crc_enabled, int, 0644);
 128MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
 129
 130static int rcv_win = 256 * 1024;
 131module_param(rcv_win, int, 0644);
 132MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
 133
 134static int snd_win = 128 * 1024;
 135module_param(snd_win, int, 0644);
 136MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
 137
 138static struct workqueue_struct *workq;
 139
 140static struct sk_buff_head rxq;
 141
 142static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
 143static void ep_timeout(struct timer_list *t);
 144static void connect_reply_upcall(struct c4iw_ep *ep, int status);
 145static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
 146
 147static LIST_HEAD(timeout_list);
 148static DEFINE_SPINLOCK(timeout_lock);
 149
 150static void deref_cm_id(struct c4iw_ep_common *epc)
 151{
 152        epc->cm_id->rem_ref(epc->cm_id);
 153        epc->cm_id = NULL;
 154        set_bit(CM_ID_DEREFED, &epc->history);
 155}
 156
 157static void ref_cm_id(struct c4iw_ep_common *epc)
 158{
 159        set_bit(CM_ID_REFED, &epc->history);
 160        epc->cm_id->add_ref(epc->cm_id);
 161}
 162
 163static void deref_qp(struct c4iw_ep *ep)
 164{
 165        c4iw_qp_rem_ref(&ep->com.qp->ibqp);
 166        clear_bit(QP_REFERENCED, &ep->com.flags);
 167        set_bit(QP_DEREFED, &ep->com.history);
 168}
 169
 170static void ref_qp(struct c4iw_ep *ep)
 171{
 172        set_bit(QP_REFERENCED, &ep->com.flags);
 173        set_bit(QP_REFED, &ep->com.history);
 174        c4iw_qp_add_ref(&ep->com.qp->ibqp);
 175}
 176
 177static void start_ep_timer(struct c4iw_ep *ep)
 178{
 179        pr_debug("ep %p\n", ep);
 180        if (timer_pending(&ep->timer)) {
 181                pr_err("%s timer already started! ep %p\n",
 182                       __func__, ep);
 183                return;
 184        }
 185        clear_bit(TIMEOUT, &ep->com.flags);
 186        c4iw_get_ep(&ep->com);
 187        ep->timer.expires = jiffies + ep_timeout_secs * HZ;
 188        add_timer(&ep->timer);
 189}
 190
 191static int stop_ep_timer(struct c4iw_ep *ep)
 192{
 193        pr_debug("ep %p stopping\n", ep);
 194        del_timer_sync(&ep->timer);
 195        if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
 196                c4iw_put_ep(&ep->com);
 197                return 0;
 198        }
 199        return 1;
 200}
 201
 202static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
 203                  struct l2t_entry *l2e)
 204{
 205        int     error = 0;
 206
 207        if (c4iw_fatal_error(rdev)) {
 208                kfree_skb(skb);
 209                pr_err("%s - device in error state - dropping\n", __func__);
 210                return -EIO;
 211        }
 212        error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
 213        if (error < 0)
 214                kfree_skb(skb);
 215        else if (error == NET_XMIT_DROP)
 216                return -ENOMEM;
 217        return error < 0 ? error : 0;
 218}
 219
 220int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
 221{
 222        int     error = 0;
 223
 224        if (c4iw_fatal_error(rdev)) {
 225                kfree_skb(skb);
 226                pr_err("%s - device in error state - dropping\n", __func__);
 227                return -EIO;
 228        }
 229        error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
 230        if (error < 0)
 231                kfree_skb(skb);
 232        return error < 0 ? error : 0;
 233}
 234
 235static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
 236{
 237        u32 len = roundup(sizeof(struct cpl_tid_release), 16);
 238
 239        skb = get_skb(skb, len, GFP_KERNEL);
 240        if (!skb)
 241                return;
 242
 243        cxgb_mk_tid_release(skb, len, hwtid, 0);
 244        c4iw_ofld_send(rdev, skb);
 245        return;
 246}
 247
 248static void set_emss(struct c4iw_ep *ep, u16 opt)
 249{
 250        ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] -
 251                   ((AF_INET == ep->com.remote_addr.ss_family) ?
 252                    sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
 253                   sizeof(struct tcphdr);
 254        ep->mss = ep->emss;
 255        if (TCPOPT_TSTAMP_G(opt))
 256                ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
 257        if (ep->emss < 128)
 258                ep->emss = 128;
 259        if (ep->emss & 7)
 260                pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
 261                         TCPOPT_MSS_G(opt), ep->mss, ep->emss);
 262        pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
 263                 ep->emss);
 264}
 265
 266static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
 267{
 268        enum c4iw_ep_state state;
 269
 270        mutex_lock(&epc->mutex);
 271        state = epc->state;
 272        mutex_unlock(&epc->mutex);
 273        return state;
 274}
 275
 276static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 277{
 278        epc->state = new;
 279}
 280
 281static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 282{
 283        mutex_lock(&epc->mutex);
 284        pr_debug("%s -> %s\n", states[epc->state], states[new]);
 285        __state_set(epc, new);
 286        mutex_unlock(&epc->mutex);
 287        return;
 288}
 289
 290static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
 291{
 292        struct sk_buff *skb;
 293        unsigned int i;
 294        size_t len;
 295
 296        len = roundup(sizeof(union cpl_wr_size), 16);
 297        for (i = 0; i < size; i++) {
 298                skb = alloc_skb(len, GFP_KERNEL);
 299                if (!skb)
 300                        goto fail;
 301                skb_queue_tail(ep_skb_list, skb);
 302        }
 303        return 0;
 304fail:
 305        skb_queue_purge(ep_skb_list);
 306        return -ENOMEM;
 307}
 308
 309static void *alloc_ep(int size, gfp_t gfp)
 310{
 311        struct c4iw_ep_common *epc;
 312
 313        epc = kzalloc(size, gfp);
 314        if (epc) {
 315                epc->wr_waitp = c4iw_alloc_wr_wait(gfp);
 316                if (!epc->wr_waitp) {
 317                        kfree(epc);
 318                        epc = NULL;
 319                        goto out;
 320                }
 321                kref_init(&epc->kref);
 322                mutex_init(&epc->mutex);
 323                c4iw_init_wr_wait(epc->wr_waitp);
 324        }
 325        pr_debug("alloc ep %p\n", epc);
 326out:
 327        return epc;
 328}
 329
 330static void remove_ep_tid(struct c4iw_ep *ep)
 331{
 332        unsigned long flags;
 333
 334        xa_lock_irqsave(&ep->com.dev->hwtids, flags);
 335        __xa_erase(&ep->com.dev->hwtids, ep->hwtid);
 336        if (xa_empty(&ep->com.dev->hwtids))
 337                wake_up(&ep->com.dev->wait);
 338        xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 339}
 340
 341static int insert_ep_tid(struct c4iw_ep *ep)
 342{
 343        unsigned long flags;
 344        int err;
 345
 346        xa_lock_irqsave(&ep->com.dev->hwtids, flags);
 347        err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL);
 348        xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
 349
 350        return err;
 351}
 352
 353/*
 354 * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
 355 */
 356static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
 357{
 358        struct c4iw_ep *ep;
 359        unsigned long flags;
 360
 361        xa_lock_irqsave(&dev->hwtids, flags);
 362        ep = xa_load(&dev->hwtids, tid);
 363        if (ep)
 364                c4iw_get_ep(&ep->com);
 365        xa_unlock_irqrestore(&dev->hwtids, flags);
 366        return ep;
 367}
 368
 369/*
 370 * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
 371 */
 372static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
 373                                               unsigned int stid)
 374{
 375        struct c4iw_listen_ep *ep;
 376        unsigned long flags;
 377
 378        xa_lock_irqsave(&dev->stids, flags);
 379        ep = xa_load(&dev->stids, stid);
 380        if (ep)
 381                c4iw_get_ep(&ep->com);
 382        xa_unlock_irqrestore(&dev->stids, flags);
 383        return ep;
 384}
 385
 386void _c4iw_free_ep(struct kref *kref)
 387{
 388        struct c4iw_ep *ep;
 389
 390        ep = container_of(kref, struct c4iw_ep, com.kref);
 391        pr_debug("ep %p state %s\n", ep, states[ep->com.state]);
 392        if (test_bit(QP_REFERENCED, &ep->com.flags))
 393                deref_qp(ep);
 394        if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
 395                if (ep->com.remote_addr.ss_family == AF_INET6) {
 396                        struct sockaddr_in6 *sin6 =
 397                                        (struct sockaddr_in6 *)
 398                                        &ep->com.local_addr;
 399
 400                        cxgb4_clip_release(
 401                                        ep->com.dev->rdev.lldi.ports[0],
 402                                        (const u32 *)&sin6->sin6_addr.s6_addr,
 403                                        1);
 404                }
 405                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
 406                                 ep->com.local_addr.ss_family);
 407                dst_release(ep->dst);
 408                cxgb4_l2t_release(ep->l2t);
 409                kfree_skb(ep->mpa_skb);
 410        }
 411        if (!skb_queue_empty(&ep->com.ep_skb_list))
 412                skb_queue_purge(&ep->com.ep_skb_list);
 413        c4iw_put_wr_wait(ep->com.wr_waitp);
 414        kfree(ep);
 415}
 416
 417static void release_ep_resources(struct c4iw_ep *ep)
 418{
 419        set_bit(RELEASE_RESOURCES, &ep->com.flags);
 420
 421        /*
 422         * If we have a hwtid, then remove it from the idr table
 423         * so lookups will no longer find this endpoint.  Otherwise
 424         * we have a race where one thread finds the ep ptr just
 425         * before the other thread is freeing the ep memory.
 426         */
 427        if (ep->hwtid != -1)
 428                remove_ep_tid(ep);
 429        c4iw_put_ep(&ep->com);
 430}
 431
 432static int status2errno(int status)
 433{
 434        switch (status) {
 435        case CPL_ERR_NONE:
 436                return 0;
 437        case CPL_ERR_CONN_RESET:
 438                return -ECONNRESET;
 439        case CPL_ERR_ARP_MISS:
 440                return -EHOSTUNREACH;
 441        case CPL_ERR_CONN_TIMEDOUT:
 442                return -ETIMEDOUT;
 443        case CPL_ERR_TCAM_FULL:
 444                return -ENOMEM;
 445        case CPL_ERR_CONN_EXIST:
 446                return -EADDRINUSE;
 447        default:
 448                return -EIO;
 449        }
 450}
 451
 452/*
 453 * Try and reuse skbs already allocated...
 454 */
 455static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
 456{
 457        if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
 458                skb_trim(skb, 0);
 459                skb_get(skb);
 460                skb_reset_transport_header(skb);
 461        } else {
 462                skb = alloc_skb(len, gfp);
 463                if (!skb)
 464                        return NULL;
 465        }
 466        t4_set_arp_err_handler(skb, NULL, NULL);
 467        return skb;
 468}
 469
 470static struct net_device *get_real_dev(struct net_device *egress_dev)
 471{
 472        return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
 473}
 474
 475static void arp_failure_discard(void *handle, struct sk_buff *skb)
 476{
 477        pr_err("ARP failure\n");
 478        kfree_skb(skb);
 479}
 480
 481static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
 482{
 483        pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
 484}
 485
 486enum {
 487        NUM_FAKE_CPLS = 2,
 488        FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
 489        FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
 490};
 491
 492static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 493{
 494        struct c4iw_ep *ep;
 495
 496        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 497        release_ep_resources(ep);
 498        return 0;
 499}
 500
 501static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
 502{
 503        struct c4iw_ep *ep;
 504
 505        ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
 506        c4iw_put_ep(&ep->parent_ep->com);
 507        release_ep_resources(ep);
 508        return 0;
 509}
 510
 511/*
 512 * Fake up a special CPL opcode and call sched() so process_work() will call
 513 * _put_ep_safe() in a safe context to free the ep resources.  This is needed
 514 * because ARP error handlers are called in an ATOMIC context, and
 515 * _c4iw_free_ep() needs to block.
 516 */
 517static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
 518                                  int cpl)
 519{
 520        struct cpl_act_establish *rpl = cplhdr(skb);
 521
 522        /* Set our special ARP_FAILURE opcode */
 523        rpl->ot.opcode = cpl;
 524
 525        /*
 526         * Save ep in the skb->cb area, after where sched() will save the dev
 527         * ptr.
 528         */
 529        *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
 530        sched(ep->com.dev, skb);
 531}
 532
 533/* Handle an ARP failure for an accept */
 534static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
 535{
 536        struct c4iw_ep *ep = handle;
 537
 538        pr_err("ARP failure during accept - tid %u - dropping connection\n",
 539               ep->hwtid);
 540
 541        __state_set(&ep->com, DEAD);
 542        queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
 543}
 544
 545/*
 546 * Handle an ARP failure for an active open.
 547 */
 548static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
 549{
 550        struct c4iw_ep *ep = handle;
 551
 552        pr_err("ARP failure during connect\n");
 553        connect_reply_upcall(ep, -EHOSTUNREACH);
 554        __state_set(&ep->com, DEAD);
 555        if (ep->com.remote_addr.ss_family == AF_INET6) {
 556                struct sockaddr_in6 *sin6 =
 557                        (struct sockaddr_in6 *)&ep->com.local_addr;
 558                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 559                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 560        }
 561        xa_erase_irq(&ep->com.dev->atids, ep->atid);
 562        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
 563        queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 564}
 565
 566/*
 567 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
 568 * and send it along.
 569 */
 570static void abort_arp_failure(void *handle, struct sk_buff *skb)
 571{
 572        int ret;
 573        struct c4iw_ep *ep = handle;
 574        struct c4iw_rdev *rdev = &ep->com.dev->rdev;
 575        struct cpl_abort_req *req = cplhdr(skb);
 576
 577        pr_debug("rdev %p\n", rdev);
 578        req->cmd = CPL_ABORT_NO_RST;
 579        skb_get(skb);
 580        ret = c4iw_ofld_send(rdev, skb);
 581        if (ret) {
 582                __state_set(&ep->com, DEAD);
 583                queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
 584        } else
 585                kfree_skb(skb);
 586}
 587
 588static int send_flowc(struct c4iw_ep *ep)
 589{
 590        struct fw_flowc_wr *flowc;
 591        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 592        u16 vlan = ep->l2t->vlan;
 593        int nparams;
 594        int flowclen, flowclen16;
 595
 596        if (WARN_ON(!skb))
 597                return -ENOMEM;
 598
 599        if (vlan == CPL_L2T_VLAN_NONE)
 600                nparams = 9;
 601        else
 602                nparams = 10;
 603
 604        flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
 605        flowclen16 = DIV_ROUND_UP(flowclen, 16);
 606        flowclen = flowclen16 * 16;
 607
 608        flowc = __skb_put(skb, flowclen);
 609        memset(flowc, 0, flowclen);
 610
 611        flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
 612                                           FW_FLOWC_WR_NPARAMS_V(nparams));
 613        flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
 614                                          FW_WR_FLOWID_V(ep->hwtid));
 615
 616        flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 617        flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
 618                                            (ep->com.dev->rdev.lldi.pf));
 619        flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
 620        flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
 621        flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
 622        flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
 623        flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
 624        flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
 625        flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
 626        flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
 627        flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
 628        flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
 629        flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
 630        flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
 631        flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 632        flowc->mnemval[7].val = cpu_to_be32(ep->emss);
 633        flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
 634        flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
 635        if (nparams == 10) {
 636                u16 pri;
 637                pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 638                flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
 639                flowc->mnemval[9].val = cpu_to_be32(pri);
 640        }
 641
 642        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 643        return c4iw_ofld_send(&ep->com.dev->rdev, skb);
 644}
 645
 646static int send_halfclose(struct c4iw_ep *ep)
 647{
 648        struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
 649        u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
 650
 651        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 652        if (WARN_ON(!skb))
 653                return -ENOMEM;
 654
 655        cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
 656                              NULL, arp_failure_discard);
 657
 658        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 659}
 660
 661static void read_tcb(struct c4iw_ep *ep)
 662{
 663        struct sk_buff *skb;
 664        struct cpl_get_tcb *req;
 665        int wrlen = roundup(sizeof(*req), 16);
 666
 667        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
 668        if (WARN_ON(!skb))
 669                return;
 670
 671        set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
 672        req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
 673        memset(req, 0, wrlen);
 674        INIT_TP_WR(req, ep->hwtid);
 675        OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
 676        req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
 677
 678        /*
 679         * keep a ref on the ep so the tcb is not unlocked before this
 680         * cpl completes. The ref is released in read_tcb_rpl().
 681         */
 682        c4iw_get_ep(&ep->com);
 683        if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
 684                c4iw_put_ep(&ep->com);
 685}
 686
 687static int send_abort_req(struct c4iw_ep *ep)
 688{
 689        u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
 690        struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
 691
 692        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 693        if (WARN_ON(!req_skb))
 694                return -ENOMEM;
 695
 696        cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
 697                          ep, abort_arp_failure);
 698
 699        return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 700}
 701
 702static int send_abort(struct c4iw_ep *ep)
 703{
 704        if (!ep->com.qp || !ep->com.qp->srq) {
 705                send_abort_req(ep);
 706                return 0;
 707        }
 708        set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
 709        read_tcb(ep);
 710        return 0;
 711}
 712
 713static int send_connect(struct c4iw_ep *ep)
 714{
 715        struct cpl_act_open_req *req = NULL;
 716        struct cpl_t5_act_open_req *t5req = NULL;
 717        struct cpl_t6_act_open_req *t6req = NULL;
 718        struct cpl_act_open_req6 *req6 = NULL;
 719        struct cpl_t5_act_open_req6 *t5req6 = NULL;
 720        struct cpl_t6_act_open_req6 *t6req6 = NULL;
 721        struct sk_buff *skb;
 722        u64 opt0;
 723        u32 opt2;
 724        unsigned int mtu_idx;
 725        u32 wscale;
 726        int win, sizev4, sizev6, wrlen;
 727        struct sockaddr_in *la = (struct sockaddr_in *)
 728                                 &ep->com.local_addr;
 729        struct sockaddr_in *ra = (struct sockaddr_in *)
 730                                 &ep->com.remote_addr;
 731        struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
 732                                   &ep->com.local_addr;
 733        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
 734                                   &ep->com.remote_addr;
 735        int ret;
 736        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 737        u32 isn = (prandom_u32() & ~7UL) - 1;
 738        struct net_device *netdev;
 739        u64 params;
 740
 741        netdev = ep->com.dev->rdev.lldi.ports[0];
 742
 743        switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 744        case CHELSIO_T4:
 745                sizev4 = sizeof(struct cpl_act_open_req);
 746                sizev6 = sizeof(struct cpl_act_open_req6);
 747                break;
 748        case CHELSIO_T5:
 749                sizev4 = sizeof(struct cpl_t5_act_open_req);
 750                sizev6 = sizeof(struct cpl_t5_act_open_req6);
 751                break;
 752        case CHELSIO_T6:
 753                sizev4 = sizeof(struct cpl_t6_act_open_req);
 754                sizev6 = sizeof(struct cpl_t6_act_open_req6);
 755                break;
 756        default:
 757                pr_err("T%d Chip is not supported\n",
 758                       CHELSIO_CHIP_VERSION(adapter_type));
 759                return -EINVAL;
 760        }
 761
 762        wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
 763                        roundup(sizev4, 16) :
 764                        roundup(sizev6, 16);
 765
 766        pr_debug("ep %p atid %u\n", ep, ep->atid);
 767
 768        skb = get_skb(NULL, wrlen, GFP_KERNEL);
 769        if (!skb) {
 770                pr_err("%s - failed to alloc skb\n", __func__);
 771                return -ENOMEM;
 772        }
 773        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
 774
 775        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
 776                      enable_tcp_timestamps,
 777                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
 778        wscale = cxgb_compute_wscale(rcv_win);
 779
 780        /*
 781         * Specify the largest window that will fit in opt0. The
 782         * remainder will be specified in the rx_data_ack.
 783         */
 784        win = ep->rcv_win >> 10;
 785        if (win > RCV_BUFSIZ_M)
 786                win = RCV_BUFSIZ_M;
 787
 788        opt0 = (nocong ? NO_CONG_F : 0) |
 789               KEEP_ALIVE_F |
 790               DELACK_F |
 791               WND_SCALE_V(wscale) |
 792               MSS_IDX_V(mtu_idx) |
 793               L2T_IDX_V(ep->l2t->idx) |
 794               TX_CHAN_V(ep->tx_chan) |
 795               SMAC_SEL_V(ep->smac_idx) |
 796               DSCP_V(ep->tos >> 2) |
 797               ULP_MODE_V(ULP_MODE_TCPDDP) |
 798               RCV_BUFSIZ_V(win);
 799        opt2 = RX_CHANNEL_V(0) |
 800               CCTRL_ECN_V(enable_ecn) |
 801               RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
 802        if (enable_tcp_timestamps)
 803                opt2 |= TSTAMPS_EN_F;
 804        if (enable_tcp_sack)
 805                opt2 |= SACK_EN_F;
 806        if (wscale && enable_tcp_window_scaling)
 807                opt2 |= WND_SCALE_EN_F;
 808        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
 809                if (peer2peer)
 810                        isn += 4;
 811
 812                opt2 |= T5_OPT_2_VALID_F;
 813                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
 814                opt2 |= T5_ISS_F;
 815        }
 816
 817        params = cxgb4_select_ntuple(netdev, ep->l2t);
 818
 819        if (ep->com.remote_addr.ss_family == AF_INET6)
 820                cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
 821                               (const u32 *)&la6->sin6_addr.s6_addr, 1);
 822
 823        t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
 824
 825        if (ep->com.remote_addr.ss_family == AF_INET) {
 826                switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 827                case CHELSIO_T4:
 828                        req = skb_put(skb, wrlen);
 829                        INIT_TP_WR(req, 0);
 830                        break;
 831                case CHELSIO_T5:
 832                        t5req = skb_put(skb, wrlen);
 833                        INIT_TP_WR(t5req, 0);
 834                        req = (struct cpl_act_open_req *)t5req;
 835                        break;
 836                case CHELSIO_T6:
 837                        t6req = skb_put(skb, wrlen);
 838                        INIT_TP_WR(t6req, 0);
 839                        req = (struct cpl_act_open_req *)t6req;
 840                        t5req = (struct cpl_t5_act_open_req *)t6req;
 841                        break;
 842                default:
 843                        pr_err("T%d Chip is not supported\n",
 844                               CHELSIO_CHIP_VERSION(adapter_type));
 845                        ret = -EINVAL;
 846                        goto clip_release;
 847                }
 848
 849                OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
 850                                        ((ep->rss_qid<<14) | ep->atid)));
 851                req->local_port = la->sin_port;
 852                req->peer_port = ra->sin_port;
 853                req->local_ip = la->sin_addr.s_addr;
 854                req->peer_ip = ra->sin_addr.s_addr;
 855                req->opt0 = cpu_to_be64(opt0);
 856
 857                if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 858                        req->params = cpu_to_be32(params);
 859                        req->opt2 = cpu_to_be32(opt2);
 860                } else {
 861                        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 862                                t5req->params =
 863                                          cpu_to_be64(FILTER_TUPLE_V(params));
 864                                t5req->rsvd = cpu_to_be32(isn);
 865                                pr_debug("snd_isn %u\n", t5req->rsvd);
 866                                t5req->opt2 = cpu_to_be32(opt2);
 867                        } else {
 868                                t6req->params =
 869                                          cpu_to_be64(FILTER_TUPLE_V(params));
 870                                t6req->rsvd = cpu_to_be32(isn);
 871                                pr_debug("snd_isn %u\n", t6req->rsvd);
 872                                t6req->opt2 = cpu_to_be32(opt2);
 873                        }
 874                }
 875        } else {
 876                switch (CHELSIO_CHIP_VERSION(adapter_type)) {
 877                case CHELSIO_T4:
 878                        req6 = skb_put(skb, wrlen);
 879                        INIT_TP_WR(req6, 0);
 880                        break;
 881                case CHELSIO_T5:
 882                        t5req6 = skb_put(skb, wrlen);
 883                        INIT_TP_WR(t5req6, 0);
 884                        req6 = (struct cpl_act_open_req6 *)t5req6;
 885                        break;
 886                case CHELSIO_T6:
 887                        t6req6 = skb_put(skb, wrlen);
 888                        INIT_TP_WR(t6req6, 0);
 889                        req6 = (struct cpl_act_open_req6 *)t6req6;
 890                        t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
 891                        break;
 892                default:
 893                        pr_err("T%d Chip is not supported\n",
 894                               CHELSIO_CHIP_VERSION(adapter_type));
 895                        ret = -EINVAL;
 896                        goto clip_release;
 897                }
 898
 899                OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
 900                                        ((ep->rss_qid<<14)|ep->atid)));
 901                req6->local_port = la6->sin6_port;
 902                req6->peer_port = ra6->sin6_port;
 903                req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
 904                req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
 905                req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
 906                req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
 907                req6->opt0 = cpu_to_be64(opt0);
 908
 909                if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 910                        req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
 911                                                                      ep->l2t));
 912                        req6->opt2 = cpu_to_be32(opt2);
 913                } else {
 914                        if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
 915                                t5req6->params =
 916                                            cpu_to_be64(FILTER_TUPLE_V(params));
 917                                t5req6->rsvd = cpu_to_be32(isn);
 918                                pr_debug("snd_isn %u\n", t5req6->rsvd);
 919                                t5req6->opt2 = cpu_to_be32(opt2);
 920                        } else {
 921                                t6req6->params =
 922                                            cpu_to_be64(FILTER_TUPLE_V(params));
 923                                t6req6->rsvd = cpu_to_be32(isn);
 924                                pr_debug("snd_isn %u\n", t6req6->rsvd);
 925                                t6req6->opt2 = cpu_to_be32(opt2);
 926                        }
 927
 928                }
 929        }
 930
 931        set_bit(ACT_OPEN_REQ, &ep->com.history);
 932        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 933clip_release:
 934        if (ret && ep->com.remote_addr.ss_family == AF_INET6)
 935                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 936                                   (const u32 *)&la6->sin6_addr.s6_addr, 1);
 937        return ret;
 938}
 939
 940static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
 941                        u8 mpa_rev_to_use)
 942{
 943        int mpalen, wrlen, ret;
 944        struct fw_ofld_tx_data_wr *req;
 945        struct mpa_message *mpa;
 946        struct mpa_v2_conn_params mpa_v2_params;
 947
 948        pr_debug("ep %p tid %u pd_len %d\n",
 949                 ep, ep->hwtid, ep->plen);
 950
 951        mpalen = sizeof(*mpa) + ep->plen;
 952        if (mpa_rev_to_use == 2)
 953                mpalen += sizeof(struct mpa_v2_conn_params);
 954        wrlen = roundup(mpalen + sizeof(*req), 16);
 955        skb = get_skb(skb, wrlen, GFP_KERNEL);
 956        if (!skb) {
 957                connect_reply_upcall(ep, -ENOMEM);
 958                return -ENOMEM;
 959        }
 960        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
 961
 962        req = skb_put_zero(skb, wrlen);
 963        req->op_to_immdlen = cpu_to_be32(
 964                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
 965                FW_WR_COMPL_F |
 966                FW_WR_IMMDLEN_V(mpalen));
 967        req->flowid_len16 = cpu_to_be32(
 968                FW_WR_FLOWID_V(ep->hwtid) |
 969                FW_WR_LEN16_V(wrlen >> 4));
 970        req->plen = cpu_to_be32(mpalen);
 971        req->tunnel_to_proxy = cpu_to_be32(
 972                FW_OFLD_TX_DATA_WR_FLUSH_F |
 973                FW_OFLD_TX_DATA_WR_SHOVE_F);
 974
 975        mpa = (struct mpa_message *)(req + 1);
 976        memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 977
 978        mpa->flags = 0;
 979        if (crc_enabled)
 980                mpa->flags |= MPA_CRC;
 981        if (markers_enabled) {
 982                mpa->flags |= MPA_MARKERS;
 983                ep->mpa_attr.recv_marker_enabled = 1;
 984        } else {
 985                ep->mpa_attr.recv_marker_enabled = 0;
 986        }
 987        if (mpa_rev_to_use == 2)
 988                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
 989
 990        mpa->private_data_size = htons(ep->plen);
 991        mpa->revision = mpa_rev_to_use;
 992        if (mpa_rev_to_use == 1) {
 993                ep->tried_with_mpa_v1 = 1;
 994                ep->retry_with_mpa_v1 = 0;
 995        }
 996
 997        if (mpa_rev_to_use == 2) {
 998                mpa->private_data_size =
 999                        htons(ntohs(mpa->private_data_size) +
1000                              sizeof(struct mpa_v2_conn_params));
1001                pr_debug("initiator ird %u ord %u\n", ep->ird,
1002                         ep->ord);
1003                mpa_v2_params.ird = htons((u16)ep->ird);
1004                mpa_v2_params.ord = htons((u16)ep->ord);
1005
1006                if (peer2peer) {
1007                        mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1008                        if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1009                                mpa_v2_params.ord |=
1010                                        htons(MPA_V2_RDMA_WRITE_RTR);
1011                        else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1012                                mpa_v2_params.ord |=
1013                                        htons(MPA_V2_RDMA_READ_RTR);
1014                }
1015                memcpy(mpa->private_data, &mpa_v2_params,
1016                       sizeof(struct mpa_v2_conn_params));
1017
1018                if (ep->plen)
1019                        memcpy(mpa->private_data +
1020                               sizeof(struct mpa_v2_conn_params),
1021                               ep->mpa_pkt + sizeof(*mpa), ep->plen);
1022        } else
1023                if (ep->plen)
1024                        memcpy(mpa->private_data,
1025                                        ep->mpa_pkt + sizeof(*mpa), ep->plen);
1026
1027        /*
1028         * Reference the mpa skb.  This ensures the data area
1029         * will remain in memory until the hw acks the tx.
1030         * Function fw4_ack() will deref it.
1031         */
1032        skb_get(skb);
1033        t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
1034        ep->mpa_skb = skb;
1035        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1036        if (ret)
1037                return ret;
1038        start_ep_timer(ep);
1039        __state_set(&ep->com, MPA_REQ_SENT);
1040        ep->mpa_attr.initiator = 1;
1041        ep->snd_seq += mpalen;
1042        return ret;
1043}
1044
1045static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1046{
1047        int mpalen, wrlen;
1048        struct fw_ofld_tx_data_wr *req;
1049        struct mpa_message *mpa;
1050        struct sk_buff *skb;
1051        struct mpa_v2_conn_params mpa_v2_params;
1052
1053        pr_debug("ep %p tid %u pd_len %d\n",
1054                 ep, ep->hwtid, ep->plen);
1055
1056        mpalen = sizeof(*mpa) + plen;
1057        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1058                mpalen += sizeof(struct mpa_v2_conn_params);
1059        wrlen = roundup(mpalen + sizeof(*req), 16);
1060
1061        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1062        if (!skb) {
1063                pr_err("%s - cannot alloc skb!\n", __func__);
1064                return -ENOMEM;
1065        }
1066        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1067
1068        req = skb_put_zero(skb, wrlen);
1069        req->op_to_immdlen = cpu_to_be32(
1070                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1071                FW_WR_COMPL_F |
1072                FW_WR_IMMDLEN_V(mpalen));
1073        req->flowid_len16 = cpu_to_be32(
1074                FW_WR_FLOWID_V(ep->hwtid) |
1075                FW_WR_LEN16_V(wrlen >> 4));
1076        req->plen = cpu_to_be32(mpalen);
1077        req->tunnel_to_proxy = cpu_to_be32(
1078                FW_OFLD_TX_DATA_WR_FLUSH_F |
1079                FW_OFLD_TX_DATA_WR_SHOVE_F);
1080
1081        mpa = (struct mpa_message *)(req + 1);
1082        memset(mpa, 0, sizeof(*mpa));
1083        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1084        mpa->flags = MPA_REJECT;
1085        mpa->revision = ep->mpa_attr.version;
1086        mpa->private_data_size = htons(plen);
1087
1088        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1089                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1090                mpa->private_data_size =
1091                        htons(ntohs(mpa->private_data_size) +
1092                              sizeof(struct mpa_v2_conn_params));
1093                mpa_v2_params.ird = htons(((u16)ep->ird) |
1094                                          (peer2peer ? MPA_V2_PEER2PEER_MODEL :
1095                                           0));
1096                mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1097                                          (p2p_type ==
1098                                           FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1099                                           MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1100                                           FW_RI_INIT_P2PTYPE_READ_REQ ?
1101                                           MPA_V2_RDMA_READ_RTR : 0) : 0));
1102                memcpy(mpa->private_data, &mpa_v2_params,
1103                       sizeof(struct mpa_v2_conn_params));
1104
1105                if (ep->plen)
1106                        memcpy(mpa->private_data +
1107                               sizeof(struct mpa_v2_conn_params), pdata, plen);
1108        } else
1109                if (plen)
1110                        memcpy(mpa->private_data, pdata, plen);
1111
1112        /*
1113         * Reference the mpa skb again.  This ensures the data area
1114         * will remain in memory until the hw acks the tx.
1115         * Function fw4_ack() will deref it.
1116         */
1117        skb_get(skb);
1118        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1119        t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1120        ep->mpa_skb = skb;
1121        ep->snd_seq += mpalen;
1122        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1123}
1124
1125static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1126{
1127        int mpalen, wrlen;
1128        struct fw_ofld_tx_data_wr *req;
1129        struct mpa_message *mpa;
1130        struct sk_buff *skb;
1131        struct mpa_v2_conn_params mpa_v2_params;
1132
1133        pr_debug("ep %p tid %u pd_len %d\n",
1134                 ep, ep->hwtid, ep->plen);
1135
1136        mpalen = sizeof(*mpa) + plen;
1137        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1138                mpalen += sizeof(struct mpa_v2_conn_params);
1139        wrlen = roundup(mpalen + sizeof(*req), 16);
1140
1141        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1142        if (!skb) {
1143                pr_err("%s - cannot alloc skb!\n", __func__);
1144                return -ENOMEM;
1145        }
1146        set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1147
1148        req = skb_put_zero(skb, wrlen);
1149        req->op_to_immdlen = cpu_to_be32(
1150                FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1151                FW_WR_COMPL_F |
1152                FW_WR_IMMDLEN_V(mpalen));
1153        req->flowid_len16 = cpu_to_be32(
1154                FW_WR_FLOWID_V(ep->hwtid) |
1155                FW_WR_LEN16_V(wrlen >> 4));
1156        req->plen = cpu_to_be32(mpalen);
1157        req->tunnel_to_proxy = cpu_to_be32(
1158                FW_OFLD_TX_DATA_WR_FLUSH_F |
1159                FW_OFLD_TX_DATA_WR_SHOVE_F);
1160
1161        mpa = (struct mpa_message *)(req + 1);
1162        memset(mpa, 0, sizeof(*mpa));
1163        memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1164        mpa->flags = 0;
1165        if (ep->mpa_attr.crc_enabled)
1166                mpa->flags |= MPA_CRC;
1167        if (ep->mpa_attr.recv_marker_enabled)
1168                mpa->flags |= MPA_MARKERS;
1169        mpa->revision = ep->mpa_attr.version;
1170        mpa->private_data_size = htons(plen);
1171
1172        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1173                mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1174                mpa->private_data_size =
1175                        htons(ntohs(mpa->private_data_size) +
1176                              sizeof(struct mpa_v2_conn_params));
1177                mpa_v2_params.ird = htons((u16)ep->ird);
1178                mpa_v2_params.ord = htons((u16)ep->ord);
1179                if (peer2peer && (ep->mpa_attr.p2p_type !=
1180                                        FW_RI_INIT_P2PTYPE_DISABLED)) {
1181                        mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1182
1183                        if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1184                                mpa_v2_params.ord |=
1185                                        htons(MPA_V2_RDMA_WRITE_RTR);
1186                        else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1187                                mpa_v2_params.ord |=
1188                                        htons(MPA_V2_RDMA_READ_RTR);
1189                }
1190
1191                memcpy(mpa->private_data, &mpa_v2_params,
1192                       sizeof(struct mpa_v2_conn_params));
1193
1194                if (ep->plen)
1195                        memcpy(mpa->private_data +
1196                               sizeof(struct mpa_v2_conn_params), pdata, plen);
1197        } else
1198                if (plen)
1199                        memcpy(mpa->private_data, pdata, plen);
1200
1201        /*
1202         * Reference the mpa skb.  This ensures the data area
1203         * will remain in memory until the hw acks the tx.
1204         * Function fw4_ack() will deref it.
1205         */
1206        skb_get(skb);
1207        t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1208        ep->mpa_skb = skb;
1209        __state_set(&ep->com, MPA_REP_SENT);
1210        ep->snd_seq += mpalen;
1211        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1212}
1213
1214static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1215{
1216        struct c4iw_ep *ep;
1217        struct cpl_act_establish *req = cplhdr(skb);
1218        unsigned short tcp_opt = ntohs(req->tcp_opt);
1219        unsigned int tid = GET_TID(req);
1220        unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1221        struct tid_info *t = dev->rdev.lldi.tids;
1222        int ret;
1223
1224        ep = lookup_atid(t, atid);
1225
1226        pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
1227                 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
1228
1229        mutex_lock(&ep->com.mutex);
1230        dst_confirm(ep->dst);
1231
1232        /* setup the hwtid for this connection */
1233        ep->hwtid = tid;
1234        cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family);
1235        insert_ep_tid(ep);
1236
1237        ep->snd_seq = be32_to_cpu(req->snd_isn);
1238        ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1239        ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
1240
1241        set_emss(ep, tcp_opt);
1242
1243        /* dealloc the atid */
1244        xa_erase_irq(&ep->com.dev->atids, atid);
1245        cxgb4_free_atid(t, atid);
1246        set_bit(ACT_ESTAB, &ep->com.history);
1247
1248        /* start MPA negotiation */
1249        ret = send_flowc(ep);
1250        if (ret)
1251                goto err;
1252        if (ep->retry_with_mpa_v1)
1253                ret = send_mpa_req(ep, skb, 1);
1254        else
1255                ret = send_mpa_req(ep, skb, mpa_rev);
1256        if (ret)
1257                goto err;
1258        mutex_unlock(&ep->com.mutex);
1259        return 0;
1260err:
1261        mutex_unlock(&ep->com.mutex);
1262        connect_reply_upcall(ep, -ENOMEM);
1263        c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1264        return 0;
1265}
1266
1267static void close_complete_upcall(struct c4iw_ep *ep, int status)
1268{
1269        struct iw_cm_event event;
1270
1271        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1272        memset(&event, 0, sizeof(event));
1273        event.event = IW_CM_EVENT_CLOSE;
1274        event.status = status;
1275        if (ep->com.cm_id) {
1276                pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
1277                         ep, ep->com.cm_id, ep->hwtid);
1278                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1279                deref_cm_id(&ep->com);
1280                set_bit(CLOSE_UPCALL, &ep->com.history);
1281        }
1282}
1283
1284static void peer_close_upcall(struct c4iw_ep *ep)
1285{
1286        struct iw_cm_event event;
1287
1288        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1289        memset(&event, 0, sizeof(event));
1290        event.event = IW_CM_EVENT_DISCONNECT;
1291        if (ep->com.cm_id) {
1292                pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
1293                         ep, ep->com.cm_id, ep->hwtid);
1294                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1295                set_bit(DISCONN_UPCALL, &ep->com.history);
1296        }
1297}
1298
1299static void peer_abort_upcall(struct c4iw_ep *ep)
1300{
1301        struct iw_cm_event event;
1302
1303        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1304        memset(&event, 0, sizeof(event));
1305        event.event = IW_CM_EVENT_CLOSE;
1306        event.status = -ECONNRESET;
1307        if (ep->com.cm_id) {
1308                pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
1309                         ep->com.cm_id, ep->hwtid);
1310                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1311                deref_cm_id(&ep->com);
1312                set_bit(ABORT_UPCALL, &ep->com.history);
1313        }
1314}
1315
1316static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1317{
1318        struct iw_cm_event event;
1319
1320        pr_debug("ep %p tid %u status %d\n",
1321                 ep, ep->hwtid, status);
1322        memset(&event, 0, sizeof(event));
1323        event.event = IW_CM_EVENT_CONNECT_REPLY;
1324        event.status = status;
1325        memcpy(&event.local_addr, &ep->com.local_addr,
1326               sizeof(ep->com.local_addr));
1327        memcpy(&event.remote_addr, &ep->com.remote_addr,
1328               sizeof(ep->com.remote_addr));
1329
1330        if ((status == 0) || (status == -ECONNREFUSED)) {
1331                if (!ep->tried_with_mpa_v1) {
1332                        /* this means MPA_v2 is used */
1333                        event.ord = ep->ird;
1334                        event.ird = ep->ord;
1335                        event.private_data_len = ep->plen -
1336                                sizeof(struct mpa_v2_conn_params);
1337                        event.private_data = ep->mpa_pkt +
1338                                sizeof(struct mpa_message) +
1339                                sizeof(struct mpa_v2_conn_params);
1340                } else {
1341                        /* this means MPA_v1 is used */
1342                        event.ord = cur_max_read_depth(ep->com.dev);
1343                        event.ird = cur_max_read_depth(ep->com.dev);
1344                        event.private_data_len = ep->plen;
1345                        event.private_data = ep->mpa_pkt +
1346                                sizeof(struct mpa_message);
1347                }
1348        }
1349
1350        pr_debug("ep %p tid %u status %d\n", ep,
1351                 ep->hwtid, status);
1352        set_bit(CONN_RPL_UPCALL, &ep->com.history);
1353        ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1354
1355        if (status < 0)
1356                deref_cm_id(&ep->com);
1357}
1358
1359static int connect_request_upcall(struct c4iw_ep *ep)
1360{
1361        struct iw_cm_event event;
1362        int ret;
1363
1364        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1365        memset(&event, 0, sizeof(event));
1366        event.event = IW_CM_EVENT_CONNECT_REQUEST;
1367        memcpy(&event.local_addr, &ep->com.local_addr,
1368               sizeof(ep->com.local_addr));
1369        memcpy(&event.remote_addr, &ep->com.remote_addr,
1370               sizeof(ep->com.remote_addr));
1371        event.provider_data = ep;
1372        if (!ep->tried_with_mpa_v1) {
1373                /* this means MPA_v2 is used */
1374                event.ord = ep->ord;
1375                event.ird = ep->ird;
1376                event.private_data_len = ep->plen -
1377                        sizeof(struct mpa_v2_conn_params);
1378                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1379                        sizeof(struct mpa_v2_conn_params);
1380        } else {
1381                /* this means MPA_v1 is used. Send max supported */
1382                event.ord = cur_max_read_depth(ep->com.dev);
1383                event.ird = cur_max_read_depth(ep->com.dev);
1384                event.private_data_len = ep->plen;
1385                event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1386        }
1387        c4iw_get_ep(&ep->com);
1388        ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1389                                                      &event);
1390        if (ret)
1391                c4iw_put_ep(&ep->com);
1392        set_bit(CONNREQ_UPCALL, &ep->com.history);
1393        c4iw_put_ep(&ep->parent_ep->com);
1394        return ret;
1395}
1396
1397static void established_upcall(struct c4iw_ep *ep)
1398{
1399        struct iw_cm_event event;
1400
1401        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1402        memset(&event, 0, sizeof(event));
1403        event.event = IW_CM_EVENT_ESTABLISHED;
1404        event.ird = ep->ord;
1405        event.ord = ep->ird;
1406        if (ep->com.cm_id) {
1407                pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1408                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1409                set_bit(ESTAB_UPCALL, &ep->com.history);
1410        }
1411}
1412
1413static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1414{
1415        struct sk_buff *skb;
1416        u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
1417        u32 credit_dack;
1418
1419        pr_debug("ep %p tid %u credits %u\n",
1420                 ep, ep->hwtid, credits);
1421        skb = get_skb(NULL, wrlen, GFP_KERNEL);
1422        if (!skb) {
1423                pr_err("update_rx_credits - cannot alloc skb!\n");
1424                return 0;
1425        }
1426
1427        /*
1428         * If we couldn't specify the entire rcv window at connection setup
1429         * due to the limit in the number of bits in the RCV_BUFSIZ field,
1430         * then add the overage in to the credits returned.
1431         */
1432        if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
1433                credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
1434
1435        credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
1436                      RX_DACK_MODE_V(dack_mode);
1437
1438        cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
1439                            credit_dack);
1440
1441        c4iw_ofld_send(&ep->com.dev->rdev, skb);
1442        return credits;
1443}
1444
1445#define RELAXED_IRD_NEGOTIATION 1
1446
1447/*
1448 * process_mpa_reply - process streaming mode MPA reply
1449 *
1450 * Returns:
1451 *
1452 * 0 upon success indicating a connect request was delivered to the ULP
1453 * or the mpa request is incomplete but valid so far.
1454 *
1455 * 1 if a failure requires the caller to close the connection.
1456 *
1457 * 2 if a failure requires the caller to abort the connection.
1458 */
1459static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1460{
1461        struct mpa_message *mpa;
1462        struct mpa_v2_conn_params *mpa_v2_params;
1463        u16 plen;
1464        u16 resp_ird, resp_ord;
1465        u8 rtr_mismatch = 0, insuff_ird = 0;
1466        struct c4iw_qp_attributes attrs;
1467        enum c4iw_qp_attr_mask mask;
1468        int err;
1469        int disconnect = 0;
1470
1471        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1472
1473        /*
1474         * If we get more than the supported amount of private data
1475         * then we must fail this connection.
1476         */
1477        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1478                err = -EINVAL;
1479                goto err_stop_timer;
1480        }
1481
1482        /*
1483         * copy the new data into our accumulation buffer.
1484         */
1485        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1486                                  skb->len);
1487        ep->mpa_pkt_len += skb->len;
1488
1489        /*
1490         * if we don't even have the mpa message, then bail.
1491         */
1492        if (ep->mpa_pkt_len < sizeof(*mpa))
1493                return 0;
1494        mpa = (struct mpa_message *) ep->mpa_pkt;
1495
1496        /* Validate MPA header. */
1497        if (mpa->revision > mpa_rev) {
1498                pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1499                       __func__, mpa_rev, mpa->revision);
1500                err = -EPROTO;
1501                goto err_stop_timer;
1502        }
1503        if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1504                err = -EPROTO;
1505                goto err_stop_timer;
1506        }
1507
1508        plen = ntohs(mpa->private_data_size);
1509
1510        /*
1511         * Fail if there's too much private data.
1512         */
1513        if (plen > MPA_MAX_PRIVATE_DATA) {
1514                err = -EPROTO;
1515                goto err_stop_timer;
1516        }
1517
1518        /*
1519         * If plen does not account for pkt size
1520         */
1521        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1522                err = -EPROTO;
1523                goto err_stop_timer;
1524        }
1525
1526        ep->plen = (u8) plen;
1527
1528        /*
1529         * If we don't have all the pdata yet, then bail.
1530         * We'll continue process when more data arrives.
1531         */
1532        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1533                return 0;
1534
1535        if (mpa->flags & MPA_REJECT) {
1536                err = -ECONNREFUSED;
1537                goto err_stop_timer;
1538        }
1539
1540        /*
1541         * Stop mpa timer.  If it expired, then
1542         * we ignore the MPA reply.  process_timeout()
1543         * will abort the connection.
1544         */
1545        if (stop_ep_timer(ep))
1546                return 0;
1547
1548        /*
1549         * If we get here we have accumulated the entire mpa
1550         * start reply message including private data. And
1551         * the MPA header is valid.
1552         */
1553        __state_set(&ep->com, FPDU_MODE);
1554        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1555        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1556        ep->mpa_attr.version = mpa->revision;
1557        ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1558
1559        if (mpa->revision == 2) {
1560                ep->mpa_attr.enhanced_rdma_conn =
1561                        mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1562                if (ep->mpa_attr.enhanced_rdma_conn) {
1563                        mpa_v2_params = (struct mpa_v2_conn_params *)
1564                                (ep->mpa_pkt + sizeof(*mpa));
1565                        resp_ird = ntohs(mpa_v2_params->ird) &
1566                                MPA_V2_IRD_ORD_MASK;
1567                        resp_ord = ntohs(mpa_v2_params->ord) &
1568                                MPA_V2_IRD_ORD_MASK;
1569                        pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
1570                                 resp_ird, resp_ord, ep->ird, ep->ord);
1571
1572                        /*
1573                         * This is a double-check. Ideally, below checks are
1574                         * not required since ird/ord stuff has been taken
1575                         * care of in c4iw_accept_cr
1576                         */
1577                        if (ep->ird < resp_ord) {
1578                                if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1579                                    ep->com.dev->rdev.lldi.max_ordird_qp)
1580                                        ep->ird = resp_ord;
1581                                else
1582                                        insuff_ird = 1;
1583                        } else if (ep->ird > resp_ord) {
1584                                ep->ird = resp_ord;
1585                        }
1586                        if (ep->ord > resp_ird) {
1587                                if (RELAXED_IRD_NEGOTIATION)
1588                                        ep->ord = resp_ird;
1589                                else
1590                                        insuff_ird = 1;
1591                        }
1592                        if (insuff_ird) {
1593                                err = -ENOMEM;
1594                                ep->ird = resp_ord;
1595                                ep->ord = resp_ird;
1596                        }
1597
1598                        if (ntohs(mpa_v2_params->ird) &
1599                                        MPA_V2_PEER2PEER_MODEL) {
1600                                if (ntohs(mpa_v2_params->ord) &
1601                                                MPA_V2_RDMA_WRITE_RTR)
1602                                        ep->mpa_attr.p2p_type =
1603                                                FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1604                                else if (ntohs(mpa_v2_params->ord) &
1605                                                MPA_V2_RDMA_READ_RTR)
1606                                        ep->mpa_attr.p2p_type =
1607                                                FW_RI_INIT_P2PTYPE_READ_REQ;
1608                        }
1609                }
1610        } else if (mpa->revision == 1)
1611                if (peer2peer)
1612                        ep->mpa_attr.p2p_type = p2p_type;
1613
1614        pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
1615                 ep->mpa_attr.crc_enabled,
1616                 ep->mpa_attr.recv_marker_enabled,
1617                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1618                 ep->mpa_attr.p2p_type, p2p_type);
1619
1620        /*
1621         * If responder's RTR does not match with that of initiator, assign
1622         * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1623         * generated when moving QP to RTS state.
1624         * A TERM message will be sent after QP has moved to RTS state
1625         */
1626        if ((ep->mpa_attr.version == 2) && peer2peer &&
1627                        (ep->mpa_attr.p2p_type != p2p_type)) {
1628                ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1629                rtr_mismatch = 1;
1630        }
1631
1632        attrs.mpa_attr = ep->mpa_attr;
1633        attrs.max_ird = ep->ird;
1634        attrs.max_ord = ep->ord;
1635        attrs.llp_stream_handle = ep;
1636        attrs.next_state = C4IW_QP_STATE_RTS;
1637
1638        mask = C4IW_QP_ATTR_NEXT_STATE |
1639            C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1640            C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1641
1642        /* bind QP and TID with INIT_WR */
1643        err = c4iw_modify_qp(ep->com.qp->rhp,
1644                             ep->com.qp, mask, &attrs, 1);
1645        if (err)
1646                goto err;
1647
1648        /*
1649         * If responder's RTR requirement did not match with what initiator
1650         * supports, generate TERM message
1651         */
1652        if (rtr_mismatch) {
1653                pr_err("%s: RTR mismatch, sending TERM\n", __func__);
1654                attrs.layer_etype = LAYER_MPA | DDP_LLP;
1655                attrs.ecode = MPA_NOMATCH_RTR;
1656                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1657                attrs.send_term = 1;
1658                err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1659                                C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1660                err = -ENOMEM;
1661                disconnect = 1;
1662                goto out;
1663        }
1664
1665        /*
1666         * Generate TERM if initiator IRD is not sufficient for responder
1667         * provided ORD. Currently, we do the same behaviour even when
1668         * responder provided IRD is also not sufficient as regards to
1669         * initiator ORD.
1670         */
1671        if (insuff_ird) {
1672                pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
1673                attrs.layer_etype = LAYER_MPA | DDP_LLP;
1674                attrs.ecode = MPA_INSUFF_IRD;
1675                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1676                attrs.send_term = 1;
1677                err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1678                                C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1679                err = -ENOMEM;
1680                disconnect = 1;
1681                goto out;
1682        }
1683        goto out;
1684err_stop_timer:
1685        stop_ep_timer(ep);
1686err:
1687        disconnect = 2;
1688out:
1689        connect_reply_upcall(ep, err);
1690        return disconnect;
1691}
1692
1693/*
1694 * process_mpa_request - process streaming mode MPA request
1695 *
1696 * Returns:
1697 *
1698 * 0 upon success indicating a connect request was delivered to the ULP
1699 * or the mpa request is incomplete but valid so far.
1700 *
1701 * 1 if a failure requires the caller to close the connection.
1702 *
1703 * 2 if a failure requires the caller to abort the connection.
1704 */
1705static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1706{
1707        struct mpa_message *mpa;
1708        struct mpa_v2_conn_params *mpa_v2_params;
1709        u16 plen;
1710
1711        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1712
1713        /*
1714         * If we get more than the supported amount of private data
1715         * then we must fail this connection.
1716         */
1717        if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
1718                goto err_stop_timer;
1719
1720        pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1721
1722        /*
1723         * Copy the new data into our accumulation buffer.
1724         */
1725        skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1726                                  skb->len);
1727        ep->mpa_pkt_len += skb->len;
1728
1729        /*
1730         * If we don't even have the mpa message, then bail.
1731         * We'll continue process when more data arrives.
1732         */
1733        if (ep->mpa_pkt_len < sizeof(*mpa))
1734                return 0;
1735
1736        pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1737        mpa = (struct mpa_message *) ep->mpa_pkt;
1738
1739        /*
1740         * Validate MPA Header.
1741         */
1742        if (mpa->revision > mpa_rev) {
1743                pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1744                       __func__, mpa_rev, mpa->revision);
1745                goto err_stop_timer;
1746        }
1747
1748        if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1749                goto err_stop_timer;
1750
1751        plen = ntohs(mpa->private_data_size);
1752
1753        /*
1754         * Fail if there's too much private data.
1755         */
1756        if (plen > MPA_MAX_PRIVATE_DATA)
1757                goto err_stop_timer;
1758
1759        /*
1760         * If plen does not account for pkt size
1761         */
1762        if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1763                goto err_stop_timer;
1764        ep->plen = (u8) plen;
1765
1766        /*
1767         * If we don't have all the pdata yet, then bail.
1768         */
1769        if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1770                return 0;
1771
1772        /*
1773         * If we get here we have accumulated the entire mpa
1774         * start reply message including private data.
1775         */
1776        ep->mpa_attr.initiator = 0;
1777        ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1778        ep->mpa_attr.recv_marker_enabled = markers_enabled;
1779        ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1780        ep->mpa_attr.version = mpa->revision;
1781        if (mpa->revision == 1)
1782                ep->tried_with_mpa_v1 = 1;
1783        ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1784
1785        if (mpa->revision == 2) {
1786                ep->mpa_attr.enhanced_rdma_conn =
1787                        mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1788                if (ep->mpa_attr.enhanced_rdma_conn) {
1789                        mpa_v2_params = (struct mpa_v2_conn_params *)
1790                                (ep->mpa_pkt + sizeof(*mpa));
1791                        ep->ird = ntohs(mpa_v2_params->ird) &
1792                                MPA_V2_IRD_ORD_MASK;
1793                        ep->ird = min_t(u32, ep->ird,
1794                                        cur_max_read_depth(ep->com.dev));
1795                        ep->ord = ntohs(mpa_v2_params->ord) &
1796                                MPA_V2_IRD_ORD_MASK;
1797                        ep->ord = min_t(u32, ep->ord,
1798                                        cur_max_read_depth(ep->com.dev));
1799                        pr_debug("initiator ird %u ord %u\n",
1800                                 ep->ird, ep->ord);
1801                        if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1802                                if (peer2peer) {
1803                                        if (ntohs(mpa_v2_params->ord) &
1804                                                        MPA_V2_RDMA_WRITE_RTR)
1805                                                ep->mpa_attr.p2p_type =
1806                                                FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1807                                        else if (ntohs(mpa_v2_params->ord) &
1808                                                        MPA_V2_RDMA_READ_RTR)
1809                                                ep->mpa_attr.p2p_type =
1810                                                FW_RI_INIT_P2PTYPE_READ_REQ;
1811                                }
1812                }
1813        } else if (mpa->revision == 1)
1814                if (peer2peer)
1815                        ep->mpa_attr.p2p_type = p2p_type;
1816
1817        pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
1818                 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1819                 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1820                 ep->mpa_attr.p2p_type);
1821
1822        __state_set(&ep->com, MPA_REQ_RCVD);
1823
1824        /* drive upcall */
1825        mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
1826        if (ep->parent_ep->com.state != DEAD) {
1827                if (connect_request_upcall(ep))
1828                        goto err_unlock_parent;
1829        } else {
1830                goto err_unlock_parent;
1831        }
1832        mutex_unlock(&ep->parent_ep->com.mutex);
1833        return 0;
1834
1835err_unlock_parent:
1836        mutex_unlock(&ep->parent_ep->com.mutex);
1837        goto err_out;
1838err_stop_timer:
1839        (void)stop_ep_timer(ep);
1840err_out:
1841        return 2;
1842}
1843
1844static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1845{
1846        struct c4iw_ep *ep;
1847        struct cpl_rx_data *hdr = cplhdr(skb);
1848        unsigned int dlen = ntohs(hdr->len);
1849        unsigned int tid = GET_TID(hdr);
1850        __u8 status = hdr->status;
1851        int disconnect = 0;
1852
1853        ep = get_ep_from_tid(dev, tid);
1854        if (!ep)
1855                return 0;
1856        pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen);
1857        skb_pull(skb, sizeof(*hdr));
1858        skb_trim(skb, dlen);
1859        mutex_lock(&ep->com.mutex);
1860
1861        switch (ep->com.state) {
1862        case MPA_REQ_SENT:
1863                update_rx_credits(ep, dlen);
1864                ep->rcv_seq += dlen;
1865                disconnect = process_mpa_reply(ep, skb);
1866                break;
1867        case MPA_REQ_WAIT:
1868                update_rx_credits(ep, dlen);
1869                ep->rcv_seq += dlen;
1870                disconnect = process_mpa_request(ep, skb);
1871                break;
1872        case FPDU_MODE: {
1873                struct c4iw_qp_attributes attrs;
1874
1875                update_rx_credits(ep, dlen);
1876                if (status)
1877                        pr_err("%s Unexpected streaming data." \
1878                               " qpid %u ep %p state %d tid %u status %d\n",
1879                               __func__, ep->com.qp->wq.sq.qid, ep,
1880                               ep->com.state, ep->hwtid, status);
1881                attrs.next_state = C4IW_QP_STATE_TERMINATE;
1882                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1883                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1884                disconnect = 1;
1885                break;
1886        }
1887        default:
1888                break;
1889        }
1890        mutex_unlock(&ep->com.mutex);
1891        if (disconnect)
1892                c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
1893        c4iw_put_ep(&ep->com);
1894        return 0;
1895}
1896
1897static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
1898{
1899        enum chip_type adapter_type;
1900
1901        adapter_type = ep->com.dev->rdev.lldi.adapter_type;
1902
1903        /*
1904         * If this TCB had a srq buffer cached, then we must complete
1905         * it. For user mode, that means saving the srqidx in the
1906         * user/kernel status page for this qp.  For kernel mode, just
1907         * synthesize the CQE now.
1908         */
1909        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
1910                if (ep->com.qp->ibqp.uobject)
1911                        t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
1912                else
1913                        c4iw_flush_srqidx(ep->com.qp, srqidx);
1914        }
1915}
1916
1917static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1918{
1919        u32 srqidx;
1920        struct c4iw_ep *ep;
1921        struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
1922        int release = 0;
1923        unsigned int tid = GET_TID(rpl);
1924
1925        ep = get_ep_from_tid(dev, tid);
1926        if (!ep) {
1927                pr_warn("Abort rpl to freed endpoint\n");
1928                return 0;
1929        }
1930
1931        if (ep->com.qp && ep->com.qp->srq) {
1932                srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
1933                complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
1934        }
1935
1936        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1937        mutex_lock(&ep->com.mutex);
1938        switch (ep->com.state) {
1939        case ABORTING:
1940                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
1941                __state_set(&ep->com, DEAD);
1942                release = 1;
1943                break;
1944        default:
1945                pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
1946                break;
1947        }
1948        mutex_unlock(&ep->com.mutex);
1949
1950        if (release) {
1951                close_complete_upcall(ep, -ECONNRESET);
1952                release_ep_resources(ep);
1953        }
1954        c4iw_put_ep(&ep->com);
1955        return 0;
1956}
1957
1958static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1959{
1960        struct sk_buff *skb;
1961        struct fw_ofld_connection_wr *req;
1962        unsigned int mtu_idx;
1963        u32 wscale;
1964        struct sockaddr_in *sin;
1965        int win;
1966
1967        skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1968        req = __skb_put_zero(skb, sizeof(*req));
1969        req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
1970        req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
1971        req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1972                                     ep->com.dev->rdev.lldi.ports[0],
1973                                     ep->l2t));
1974        sin = (struct sockaddr_in *)&ep->com.local_addr;
1975        req->le.lport = sin->sin_port;
1976        req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1977        sin = (struct sockaddr_in *)&ep->com.remote_addr;
1978        req->le.pport = sin->sin_port;
1979        req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1980        req->tcb.t_state_to_astid =
1981                        htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
1982                        FW_OFLD_CONNECTION_WR_ASTID_V(atid));
1983        req->tcb.cplrxdataack_cplpassacceptrpl =
1984                        htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
1985        req->tcb.tx_max = (__force __be32) jiffies;
1986        req->tcb.rcv_adv = htons(1);
1987        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1988                      enable_tcp_timestamps,
1989                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
1990        wscale = cxgb_compute_wscale(rcv_win);
1991
1992        /*
1993         * Specify the largest window that will fit in opt0. The
1994         * remainder will be specified in the rx_data_ack.
1995         */
1996        win = ep->rcv_win >> 10;
1997        if (win > RCV_BUFSIZ_M)
1998                win = RCV_BUFSIZ_M;
1999
2000        req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F |
2001                (nocong ? NO_CONG_F : 0) |
2002                KEEP_ALIVE_F |
2003                DELACK_F |
2004                WND_SCALE_V(wscale) |
2005                MSS_IDX_V(mtu_idx) |
2006                L2T_IDX_V(ep->l2t->idx) |
2007                TX_CHAN_V(ep->tx_chan) |
2008                SMAC_SEL_V(ep->smac_idx) |
2009                DSCP_V(ep->tos >> 2) |
2010                ULP_MODE_V(ULP_MODE_TCPDDP) |
2011                RCV_BUFSIZ_V(win));
2012        req->tcb.opt2 = (__force __be32) (PACE_V(1) |
2013                TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
2014                RX_CHANNEL_V(0) |
2015                CCTRL_ECN_V(enable_ecn) |
2016                RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
2017        if (enable_tcp_timestamps)
2018                req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F;
2019        if (enable_tcp_sack)
2020                req->tcb.opt2 |= (__force __be32)SACK_EN_F;
2021        if (wscale && enable_tcp_window_scaling)
2022                req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
2023        req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
2024        req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
2025        set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
2026        set_bit(ACT_OFLD_CONN, &ep->com.history);
2027        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2028}
2029
2030/*
2031 * Some of the error codes above implicitly indicate that there is no TID
2032 * allocated with the result of an ACT_OPEN.  We use this predicate to make
2033 * that explicit.
2034 */
2035static inline int act_open_has_tid(int status)
2036{
2037        return (status != CPL_ERR_TCAM_PARITY &&
2038                status != CPL_ERR_TCAM_MISS &&
2039                status != CPL_ERR_TCAM_FULL &&
2040                status != CPL_ERR_CONN_EXIST_SYNRECV &&
2041                status != CPL_ERR_CONN_EXIST);
2042}
2043
2044static char *neg_adv_str(unsigned int status)
2045{
2046        switch (status) {
2047        case CPL_ERR_RTX_NEG_ADVICE:
2048                return "Retransmit timeout";
2049        case CPL_ERR_PERSIST_NEG_ADVICE:
2050                return "Persist timeout";
2051        case CPL_ERR_KEEPALV_NEG_ADVICE:
2052                return "Keepalive timeout";
2053        default:
2054                return "Unknown";
2055        }
2056}
2057
2058static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
2059{
2060        ep->snd_win = snd_win;
2061        ep->rcv_win = rcv_win;
2062        pr_debug("snd_win %d rcv_win %d\n",
2063                 ep->snd_win, ep->rcv_win);
2064}
2065
2066#define ACT_OPEN_RETRY_COUNT 2
2067
2068static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
2069                     struct dst_entry *dst, struct c4iw_dev *cdev,
2070                     bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
2071{
2072        struct neighbour *n;
2073        int err, step;
2074        struct net_device *pdev;
2075
2076        n = dst_neigh_lookup(dst, peer_ip);
2077        if (!n)
2078                return -ENODEV;
2079
2080        rcu_read_lock();
2081        err = -ENOMEM;
2082        if (n->dev->flags & IFF_LOOPBACK) {
2083                if (iptype == 4)
2084                        pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
2085                else if (IS_ENABLED(CONFIG_IPV6))
2086                        for_each_netdev(&init_net, pdev) {
2087                                if (ipv6_chk_addr(&init_net,
2088                                                  (struct in6_addr *)peer_ip,
2089                                                  pdev, 1))
2090                                        break;
2091                        }
2092                else
2093                        pdev = NULL;
2094
2095                if (!pdev) {
2096                        err = -ENODEV;
2097                        goto out;
2098                }
2099                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2100                                        n, pdev, rt_tos2priority(tos));
2101                if (!ep->l2t) {
2102                        dev_put(pdev);
2103                        goto out;
2104                }
2105                ep->mtu = pdev->mtu;
2106                ep->tx_chan = cxgb4_port_chan(pdev);
2107                ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2108                step = cdev->rdev.lldi.ntxq /
2109                        cdev->rdev.lldi.nchan;
2110                ep->txq_idx = cxgb4_port_idx(pdev) * step;
2111                step = cdev->rdev.lldi.nrxq /
2112                        cdev->rdev.lldi.nchan;
2113                ep->ctrlq_idx = cxgb4_port_idx(pdev);
2114                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2115                        cxgb4_port_idx(pdev) * step];
2116                set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2117                dev_put(pdev);
2118        } else {
2119                pdev = get_real_dev(n->dev);
2120                ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2121                                        n, pdev, rt_tos2priority(tos));
2122                if (!ep->l2t)
2123                        goto out;
2124                ep->mtu = dst_mtu(dst);
2125                ep->tx_chan = cxgb4_port_chan(pdev);
2126                ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2127                step = cdev->rdev.lldi.ntxq /
2128                        cdev->rdev.lldi.nchan;
2129                ep->txq_idx = cxgb4_port_idx(pdev) * step;
2130                ep->ctrlq_idx = cxgb4_port_idx(pdev);
2131                step = cdev->rdev.lldi.nrxq /
2132                        cdev->rdev.lldi.nchan;
2133                ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2134                        cxgb4_port_idx(pdev) * step];
2135                set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2136
2137                if (clear_mpa_v1) {
2138                        ep->retry_with_mpa_v1 = 0;
2139                        ep->tried_with_mpa_v1 = 0;
2140                }
2141        }
2142        err = 0;
2143out:
2144        rcu_read_unlock();
2145
2146        neigh_release(n);
2147
2148        return err;
2149}
2150
2151static int c4iw_reconnect(struct c4iw_ep *ep)
2152{
2153        int err = 0;
2154        int size = 0;
2155        struct sockaddr_in *laddr = (struct sockaddr_in *)
2156                                    &ep->com.cm_id->m_local_addr;
2157        struct sockaddr_in *raddr = (struct sockaddr_in *)
2158                                    &ep->com.cm_id->m_remote_addr;
2159        struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
2160                                      &ep->com.cm_id->m_local_addr;
2161        struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
2162                                      &ep->com.cm_id->m_remote_addr;
2163        int iptype;
2164        __u8 *ra;
2165
2166        pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id);
2167        c4iw_init_wr_wait(ep->com.wr_waitp);
2168
2169        /* When MPA revision is different on nodes, the node with MPA_rev=2
2170         * tries to reconnect with MPA_rev 1 for the same EP through
2171         * c4iw_reconnect(), where the same EP is assigned with new tid for
2172         * further connection establishment. As we are using the same EP pointer
2173         * for reconnect, few skbs are used during the previous c4iw_connect(),
2174         * which leaves the EP with inadequate skbs for further
2175         * c4iw_reconnect(), Further causing a crash due to an empty
2176         * skb_list() during peer_abort(). Allocate skbs which is already used.
2177         */
2178        size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
2179        if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
2180                err = -ENOMEM;
2181                goto fail1;
2182        }
2183
2184        /*
2185         * Allocate an active TID to initiate a TCP connection.
2186         */
2187        ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
2188        if (ep->atid == -1) {
2189                pr_err("%s - cannot alloc atid\n", __func__);
2190                err = -ENOMEM;
2191                goto fail2;
2192        }
2193        err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL);
2194        if (err)
2195                goto fail2a;
2196
2197        /* find a route */
2198        if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
2199                ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
2200                                          laddr->sin_addr.s_addr,
2201                                          raddr->sin_addr.s_addr,
2202                                          laddr->sin_port,
2203                                          raddr->sin_port, ep->com.cm_id->tos);
2204                iptype = 4;
2205                ra = (__u8 *)&raddr->sin_addr;
2206        } else {
2207                ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
2208                                           get_real_dev,
2209                                           laddr6->sin6_addr.s6_addr,
2210                                           raddr6->sin6_addr.s6_addr,
2211                                           laddr6->sin6_port,
2212                                           raddr6->sin6_port,
2213                                           ep->com.cm_id->tos,
2214                                           raddr6->sin6_scope_id);
2215                iptype = 6;
2216                ra = (__u8 *)&raddr6->sin6_addr;
2217        }
2218        if (!ep->dst) {
2219                pr_err("%s - cannot find route\n", __func__);
2220                err = -EHOSTUNREACH;
2221                goto fail3;
2222        }
2223        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2224                        ep->com.dev->rdev.lldi.adapter_type,
2225                        ep->com.cm_id->tos);
2226        if (err) {
2227                pr_err("%s - cannot alloc l2e\n", __func__);
2228                goto fail4;
2229        }
2230
2231        pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2232                 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2233                 ep->l2t->idx);
2234
2235        state_set(&ep->com, CONNECTING);
2236        ep->tos = ep->com.cm_id->tos;
2237
2238        /* send connect request to rnic */
2239        err = send_connect(ep);
2240        if (!err)
2241                goto out;
2242
2243        cxgb4_l2t_release(ep->l2t);
2244fail4:
2245        dst_release(ep->dst);
2246fail3:
2247        xa_erase_irq(&ep->com.dev->atids, ep->atid);
2248fail2a:
2249        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2250fail2:
2251        /*
2252         * remember to send notification to upper layer.
2253         * We are in here so the upper layer is not aware that this is
2254         * re-connect attempt and so, upper layer is still waiting for
2255         * response of 1st connect request.
2256         */
2257        connect_reply_upcall(ep, -ECONNRESET);
2258fail1:
2259        c4iw_put_ep(&ep->com);
2260out:
2261        return err;
2262}
2263
2264static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2265{
2266        struct c4iw_ep *ep;
2267        struct cpl_act_open_rpl *rpl = cplhdr(skb);
2268        unsigned int atid = TID_TID_G(AOPEN_ATID_G(
2269                                      ntohl(rpl->atid_status)));
2270        struct tid_info *t = dev->rdev.lldi.tids;
2271        int status = AOPEN_STATUS_G(ntohl(rpl->atid_status));
2272        struct sockaddr_in *la;
2273        struct sockaddr_in *ra;
2274        struct sockaddr_in6 *la6;
2275        struct sockaddr_in6 *ra6;
2276        int ret = 0;
2277
2278        ep = lookup_atid(t, atid);
2279        la = (struct sockaddr_in *)&ep->com.local_addr;
2280        ra = (struct sockaddr_in *)&ep->com.remote_addr;
2281        la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
2282        ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
2283
2284        pr_debug("ep %p atid %u status %u errno %d\n", ep, atid,
2285                 status, status2errno(status));
2286
2287        if (cxgb_is_neg_adv(status)) {
2288                pr_debug("Connection problems for atid %u status %u (%s)\n",
2289                         atid, status, neg_adv_str(status));
2290                ep->stats.connect_neg_adv++;
2291                mutex_lock(&dev->rdev.stats.lock);
2292                dev->rdev.stats.neg_adv++;
2293                mutex_unlock(&dev->rdev.stats.lock);
2294                return 0;
2295        }
2296
2297        set_bit(ACT_OPEN_RPL, &ep->com.history);
2298
2299        /*
2300         * Log interesting failures.
2301         */
2302        switch (status) {
2303        case CPL_ERR_CONN_RESET:
2304        case CPL_ERR_CONN_TIMEDOUT:
2305                break;
2306        case CPL_ERR_TCAM_FULL:
2307                mutex_lock(&dev->rdev.stats.lock);
2308                dev->rdev.stats.tcam_full++;
2309                mutex_unlock(&dev->rdev.stats.lock);
2310                if (ep->com.local_addr.ss_family == AF_INET &&
2311                    dev->rdev.lldi.enable_fw_ofld_conn) {
2312                        ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
2313                                                   ntohl(rpl->atid_status))));
2314                        if (ret)
2315                                goto fail;
2316                        return 0;
2317                }
2318                break;
2319        case CPL_ERR_CONN_EXIST:
2320                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2321                        set_bit(ACT_RETRY_INUSE, &ep->com.history);
2322                        if (ep->com.remote_addr.ss_family == AF_INET6) {
2323                                struct sockaddr_in6 *sin6 =
2324                                                (struct sockaddr_in6 *)
2325                                                &ep->com.local_addr;
2326                                cxgb4_clip_release(
2327                                                ep->com.dev->rdev.lldi.ports[0],
2328                                                (const u32 *)
2329                                                &sin6->sin6_addr.s6_addr, 1);
2330                        }
2331                        xa_erase_irq(&ep->com.dev->atids, atid);
2332                        cxgb4_free_atid(t, atid);
2333                        dst_release(ep->dst);
2334                        cxgb4_l2t_release(ep->l2t);
2335                        c4iw_reconnect(ep);
2336                        return 0;
2337                }
2338                break;
2339        default:
2340                if (ep->com.local_addr.ss_family == AF_INET) {
2341                        pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
2342                                atid, status, status2errno(status),
2343                                &la->sin_addr.s_addr, ntohs(la->sin_port),
2344                                &ra->sin_addr.s_addr, ntohs(ra->sin_port));
2345                } else {
2346                        pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
2347                                atid, status, status2errno(status),
2348                                la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
2349                                ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
2350                }
2351                break;
2352        }
2353
2354fail:
2355        connect_reply_upcall(ep, status2errno(status));
2356        state_set(&ep->com, DEAD);
2357
2358        if (ep->com.remote_addr.ss_family == AF_INET6) {
2359                struct sockaddr_in6 *sin6 =
2360                        (struct sockaddr_in6 *)&ep->com.local_addr;
2361                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
2362                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2363        }
2364        if (status && act_open_has_tid(status))
2365                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
2366                                 ep->com.local_addr.ss_family);
2367
2368        xa_erase_irq(&ep->com.dev->atids, atid);
2369        cxgb4_free_atid(t, atid);
2370        dst_release(ep->dst);
2371        cxgb4_l2t_release(ep->l2t);
2372        c4iw_put_ep(&ep->com);
2373
2374        return 0;
2375}
2376
2377static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2378{
2379        struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2380        unsigned int stid = GET_TID(rpl);
2381        struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2382
2383        if (!ep) {
2384                pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2385                goto out;
2386        }
2387        pr_debug("ep %p status %d error %d\n", ep,
2388                 rpl->status, status2errno(rpl->status));
2389        c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2390        c4iw_put_ep(&ep->com);
2391out:
2392        return 0;
2393}
2394
2395static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2396{
2397        struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2398        unsigned int stid = GET_TID(rpl);
2399        struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2400
2401        if (!ep) {
2402                pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2403                goto out;
2404        }
2405        pr_debug("ep %p\n", ep);
2406        c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2407        c4iw_put_ep(&ep->com);
2408out:
2409        return 0;
2410}
2411
2412static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2413                     struct cpl_pass_accept_req *req)
2414{
2415        struct cpl_pass_accept_rpl *rpl;
2416        unsigned int mtu_idx;
2417        u64 opt0;
2418        u32 opt2;
2419        u32 wscale;
2420        struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2421        int win;
2422        enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
2423
2424        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2425        cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2426                      enable_tcp_timestamps && req->tcpopt.tstamp,
2427                      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
2428        wscale = cxgb_compute_wscale(rcv_win);
2429
2430        /*
2431         * Specify the largest window that will fit in opt0. The
2432         * remainder will be specified in the rx_data_ack.
2433         */
2434        win = ep->rcv_win >> 10;
2435        if (win > RCV_BUFSIZ_M)
2436                win = RCV_BUFSIZ_M;
2437        opt0 = (nocong ? NO_CONG_F : 0) |
2438               KEEP_ALIVE_F |
2439               DELACK_F |
2440               WND_SCALE_V(wscale) |
2441               MSS_IDX_V(mtu_idx) |
2442               L2T_IDX_V(ep->l2t->idx) |
2443               TX_CHAN_V(ep->tx_chan) |
2444               SMAC_SEL_V(ep->smac_idx) |
2445               DSCP_V(ep->tos >> 2) |
2446               ULP_MODE_V(ULP_MODE_TCPDDP) |
2447               RCV_BUFSIZ_V(win);
2448        opt2 = RX_CHANNEL_V(0) |
2449               RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
2450
2451        if (enable_tcp_timestamps && req->tcpopt.tstamp)
2452                opt2 |= TSTAMPS_EN_F;
2453        if (enable_tcp_sack && req->tcpopt.sack)
2454                opt2 |= SACK_EN_F;
2455        if (wscale && enable_tcp_window_scaling)
2456                opt2 |= WND_SCALE_EN_F;
2457        if (enable_ecn) {
2458                const struct tcphdr *tcph;
2459                u32 hlen = ntohl(req->hdr_len);
2460
2461                if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
2462                        tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2463                                IP_HDR_LEN_G(hlen);
2464                else
2465                        tcph = (const void *)(req + 1) +
2466                                T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
2467                if (tcph->ece && tcph->cwr)
2468                        opt2 |= CCTRL_ECN_V(1);
2469        }
2470
2471        skb_get(skb);
2472        rpl = cplhdr(skb);
2473        if (!is_t4(adapter_type)) {
2474                skb_trim(skb, roundup(sizeof(*rpl5), 16));
2475                rpl5 = (void *)rpl;
2476                INIT_TP_WR(rpl5, ep->hwtid);
2477        } else {
2478                skb_trim(skb, sizeof(*rpl));
2479                INIT_TP_WR(rpl, ep->hwtid);
2480        }
2481        OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2482                                                    ep->hwtid));
2483
2484        if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
2485                u32 isn = (prandom_u32() & ~7UL) - 1;
2486                opt2 |= T5_OPT_2_VALID_F;
2487                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
2488                opt2 |= T5_ISS_F;
2489                rpl5 = (void *)rpl;
2490                memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
2491                if (peer2peer)
2492                        isn += 4;
2493                rpl5->iss = cpu_to_be32(isn);
2494                pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
2495        }
2496
2497        rpl->opt0 = cpu_to_be64(opt0);
2498        rpl->opt2 = cpu_to_be32(opt2);
2499        set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2500        t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
2501
2502        return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2503}
2504
2505static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2506{
2507        pr_debug("c4iw_dev %p tid %u\n", dev, hwtid);
2508        skb_trim(skb, sizeof(struct cpl_tid_release));
2509        release_tid(&dev->rdev, hwtid, skb);
2510        return;
2511}
2512
2513static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2514{
2515        struct c4iw_ep *child_ep = NULL, *parent_ep;
2516        struct cpl_pass_accept_req *req = cplhdr(skb);
2517        unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
2518        struct tid_info *t = dev->rdev.lldi.tids;
2519        unsigned int hwtid = GET_TID(req);
2520        struct dst_entry *dst;
2521        __u8 local_ip[16], peer_ip[16];
2522        __be16 local_port, peer_port;
2523        struct sockaddr_in6 *sin6;
2524        int err;
2525        u16 peer_mss = ntohs(req->tcpopt.mss);
2526        int iptype;
2527        unsigned short hdrs;
2528        u8 tos;
2529
2530        parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
2531        if (!parent_ep) {
2532                pr_err("%s connect request on invalid stid %d\n",
2533                       __func__, stid);
2534                goto reject;
2535        }
2536
2537        if (state_read(&parent_ep->com) != LISTEN) {
2538                pr_err("%s - listening ep not in LISTEN\n", __func__);
2539                goto reject;
2540        }
2541
2542        if (parent_ep->com.cm_id->tos_set)
2543                tos = parent_ep->com.cm_id->tos;
2544        else
2545                tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2546
2547        cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
2548                        &iptype, local_ip, peer_ip, &local_port, &peer_port);
2549
2550        /* Find output route */
2551        if (iptype == 4)  {
2552                pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
2553                         , parent_ep, hwtid,
2554                         local_ip, peer_ip, ntohs(local_port),
2555                         ntohs(peer_port), peer_mss);
2556                dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
2557                                      *(__be32 *)local_ip, *(__be32 *)peer_ip,
2558                                      local_port, peer_port, tos);
2559        } else {
2560                pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2561                         , parent_ep, hwtid,
2562                         local_ip, peer_ip, ntohs(local_port),
2563                         ntohs(peer_port), peer_mss);
2564                dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
2565                                local_ip, peer_ip, local_port, peer_port,
2566                                tos,
2567                                ((struct sockaddr_in6 *)
2568                                 &parent_ep->com.local_addr)->sin6_scope_id);
2569        }
2570        if (!dst) {
2571                pr_err("%s - failed to find dst entry!\n", __func__);
2572                goto reject;
2573        }
2574
2575        child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
2576        if (!child_ep) {
2577                pr_err("%s - failed to allocate ep entry!\n", __func__);
2578                dst_release(dst);
2579                goto reject;
2580        }
2581
2582        err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2583                        parent_ep->com.dev->rdev.lldi.adapter_type, tos);
2584        if (err) {
2585                pr_err("%s - failed to allocate l2t entry!\n", __func__);
2586                dst_release(dst);
2587                kfree(child_ep);
2588                goto reject;
2589        }
2590
2591        hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
2592               sizeof(struct tcphdr) +
2593               ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2594        if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2595                child_ep->mtu = peer_mss + hdrs;
2596
2597        skb_queue_head_init(&child_ep->com.ep_skb_list);
2598        if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
2599                goto fail;
2600
2601        state_set(&child_ep->com, CONNECTING);
2602        child_ep->com.dev = dev;
2603        child_ep->com.cm_id = NULL;
2604
2605        if (iptype == 4) {
2606                struct sockaddr_in *sin = (struct sockaddr_in *)
2607                        &child_ep->com.local_addr;
2608
2609                sin->sin_family = AF_INET;
2610                sin->sin_port = local_port;
2611                sin->sin_addr.s_addr = *(__be32 *)local_ip;
2612
2613                sin = (struct sockaddr_in *)&child_ep->com.local_addr;
2614                sin->sin_family = AF_INET;
2615                sin->sin_port = ((struct sockaddr_in *)
2616                                 &parent_ep->com.local_addr)->sin_port;
2617                sin->sin_addr.s_addr = *(__be32 *)local_ip;
2618
2619                sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
2620                sin->sin_family = AF_INET;
2621                sin->sin_port = peer_port;
2622                sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2623        } else {
2624                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2625                sin6->sin6_family = PF_INET6;
2626                sin6->sin6_port = local_port;
2627                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2628
2629                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2630                sin6->sin6_family = PF_INET6;
2631                sin6->sin6_port = ((struct sockaddr_in6 *)
2632                                   &parent_ep->com.local_addr)->sin6_port;
2633                memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2634
2635                sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
2636                sin6->sin6_family = PF_INET6;
2637                sin6->sin6_port = peer_port;
2638                memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2639        }
2640
2641        c4iw_get_ep(&parent_ep->com);
2642        child_ep->parent_ep = parent_ep;
2643        child_ep->tos = tos;
2644        child_ep->dst = dst;
2645        child_ep->hwtid = hwtid;
2646
2647        pr_debug("tx_chan %u smac_idx %u rss_qid %u\n",
2648                 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
2649
2650        timer_setup(&child_ep->timer, ep_timeout, 0);
2651        cxgb4_insert_tid(t, child_ep, hwtid,
2652                         child_ep->com.local_addr.ss_family);
2653        insert_ep_tid(child_ep);
2654        if (accept_cr(child_ep, skb, req)) {
2655                c4iw_put_ep(&parent_ep->com);
2656                release_ep_resources(child_ep);
2657        } else {
2658                set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2659        }
2660        if (iptype == 6) {
2661                sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2662                cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
2663                               (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2664        }
2665        goto out;
2666fail:
2667        c4iw_put_ep(&child_ep->com);
2668reject:
2669        reject_cr(dev, hwtid, skb);
2670out:
2671        if (parent_ep)
2672                c4iw_put_ep(&parent_ep->com);
2673        return 0;
2674}
2675
2676static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2677{
2678        struct c4iw_ep *ep;
2679        struct cpl_pass_establish *req = cplhdr(skb);
2680        unsigned int tid = GET_TID(req);
2681        int ret;
2682        u16 tcp_opt = ntohs(req->tcp_opt);
2683
2684        ep = get_ep_from_tid(dev, tid);
2685        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2686        ep->snd_seq = be32_to_cpu(req->snd_isn);
2687        ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2688        ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
2689
2690        pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
2691
2692        set_emss(ep, tcp_opt);
2693
2694        dst_confirm(ep->dst);
2695        mutex_lock(&ep->com.mutex);
2696        ep->com.state = MPA_REQ_WAIT;
2697        start_ep_timer(ep);
2698        set_bit(PASS_ESTAB, &ep->com.history);
2699        ret = send_flowc(ep);
2700        mutex_unlock(&ep->com.mutex);
2701        if (ret)
2702                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2703        c4iw_put_ep(&ep->com);
2704
2705        return 0;
2706}
2707
2708static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2709{
2710        struct cpl_peer_close *hdr = cplhdr(skb);
2711        struct c4iw_ep *ep;
2712        struct c4iw_qp_attributes attrs;
2713        int disconnect = 1;
2714        int release = 0;
2715        unsigned int tid = GET_TID(hdr);
2716        int ret;
2717
2718        ep = get_ep_from_tid(dev, tid);
2719        if (!ep)
2720                return 0;
2721
2722        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2723        dst_confirm(ep->dst);
2724
2725        set_bit(PEER_CLOSE, &ep->com.history);
2726        mutex_lock(&ep->com.mutex);
2727        switch (ep->com.state) {
2728        case MPA_REQ_WAIT:
2729                __state_set(&ep->com, CLOSING);
2730                break;
2731        case MPA_REQ_SENT:
2732                __state_set(&ep->com, CLOSING);
2733                connect_reply_upcall(ep, -ECONNRESET);
2734                break;
2735        case MPA_REQ_RCVD:
2736
2737                /*
2738                 * We're gonna mark this puppy DEAD, but keep
2739                 * the reference on it until the ULP accepts or
2740                 * rejects the CR. Also wake up anyone waiting
2741                 * in rdma connection migration (see c4iw_accept_cr()).
2742                 */
2743                __state_set(&ep->com, CLOSING);
2744                pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2745                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2746                break;
2747        case MPA_REP_SENT:
2748                __state_set(&ep->com, CLOSING);
2749                pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2750                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2751                break;
2752        case FPDU_MODE:
2753                start_ep_timer(ep);
2754                __state_set(&ep->com, CLOSING);
2755                attrs.next_state = C4IW_QP_STATE_CLOSING;
2756                ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2757                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2758                if (ret != -ECONNRESET) {
2759                        peer_close_upcall(ep);
2760                        disconnect = 1;
2761                }
2762                break;
2763        case ABORTING:
2764                disconnect = 0;
2765                break;
2766        case CLOSING:
2767                __state_set(&ep->com, MORIBUND);
2768                disconnect = 0;
2769                break;
2770        case MORIBUND:
2771                (void)stop_ep_timer(ep);
2772                if (ep->com.cm_id && ep->com.qp) {
2773                        attrs.next_state = C4IW_QP_STATE_IDLE;
2774                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2775                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2776                }
2777                close_complete_upcall(ep, 0);
2778                __state_set(&ep->com, DEAD);
2779                release = 1;
2780                disconnect = 0;
2781                break;
2782        case DEAD:
2783                disconnect = 0;
2784                break;
2785        default:
2786                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2787        }
2788        mutex_unlock(&ep->com.mutex);
2789        if (disconnect)
2790                c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2791        if (release)
2792                release_ep_resources(ep);
2793        c4iw_put_ep(&ep->com);
2794        return 0;
2795}
2796
2797static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
2798{
2799        complete_cached_srq_buffers(ep, ep->srqe_idx);
2800        if (ep->com.cm_id && ep->com.qp) {
2801                struct c4iw_qp_attributes attrs;
2802
2803                attrs.next_state = C4IW_QP_STATE_ERROR;
2804                c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2805                               C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2806        }
2807        peer_abort_upcall(ep);
2808        release_ep_resources(ep);
2809        c4iw_put_ep(&ep->com);
2810}
2811
2812static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2813{
2814        struct cpl_abort_req_rss6 *req = cplhdr(skb);
2815        struct c4iw_ep *ep;
2816        struct sk_buff *rpl_skb;
2817        struct c4iw_qp_attributes attrs;
2818        int ret;
2819        int release = 0;
2820        unsigned int tid = GET_TID(req);
2821        u8 status;
2822        u32 srqidx;
2823
2824        u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
2825
2826        ep = get_ep_from_tid(dev, tid);
2827        if (!ep)
2828                return 0;
2829
2830        status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
2831
2832        if (cxgb_is_neg_adv(status)) {
2833                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
2834                         ep->hwtid, status, neg_adv_str(status));
2835                ep->stats.abort_neg_adv++;
2836                mutex_lock(&dev->rdev.stats.lock);
2837                dev->rdev.stats.neg_adv++;
2838                mutex_unlock(&dev->rdev.stats.lock);
2839                goto deref_ep;
2840        }
2841
2842        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
2843                 ep->com.state);
2844        set_bit(PEER_ABORT, &ep->com.history);
2845
2846        /*
2847         * Wake up any threads in rdma_init() or rdma_fini().
2848         * However, this is not needed if com state is just
2849         * MPA_REQ_SENT
2850         */
2851        if (ep->com.state != MPA_REQ_SENT)
2852                c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2853
2854        mutex_lock(&ep->com.mutex);
2855        switch (ep->com.state) {
2856        case CONNECTING:
2857                c4iw_put_ep(&ep->parent_ep->com);
2858                break;
2859        case MPA_REQ_WAIT:
2860                (void)stop_ep_timer(ep);
2861                break;
2862        case MPA_REQ_SENT:
2863                (void)stop_ep_timer(ep);
2864                if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
2865                    (mpa_rev == 2 && ep->tried_with_mpa_v1))
2866                        connect_reply_upcall(ep, -ECONNRESET);
2867                else {
2868                        /*
2869                         * we just don't send notification upwards because we
2870                         * want to retry with mpa_v1 without upper layers even
2871                         * knowing it.
2872                         *
2873                         * do some housekeeping so as to re-initiate the
2874                         * connection
2875                         */
2876                        pr_info("%s: mpa_rev=%d. Retrying with mpav1\n",
2877                                __func__, mpa_rev);
2878                        ep->retry_with_mpa_v1 = 1;
2879                }
2880                break;
2881        case MPA_REP_SENT:
2882                break;
2883        case MPA_REQ_RCVD:
2884                break;
2885        case MORIBUND:
2886        case CLOSING:
2887                stop_ep_timer(ep);
2888                fallthrough;
2889        case FPDU_MODE:
2890                if (ep->com.qp && ep->com.qp->srq) {
2891                        srqidx = ABORT_RSS_SRQIDX_G(
2892                                        be32_to_cpu(req->srqidx_status));
2893                        if (srqidx) {
2894                                complete_cached_srq_buffers(ep, srqidx);
2895                        } else {
2896                                /* Hold ep ref until finish_peer_abort() */
2897                                c4iw_get_ep(&ep->com);
2898                                __state_set(&ep->com, ABORTING);
2899                                set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
2900                                read_tcb(ep);
2901                                break;
2902
2903                        }
2904                }
2905
2906                if (ep->com.cm_id && ep->com.qp) {
2907                        attrs.next_state = C4IW_QP_STATE_ERROR;
2908                        ret = c4iw_modify_qp(ep->com.qp->rhp,
2909                                     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2910                                     &attrs, 1);
2911                        if (ret)
2912                                pr_err("%s - qp <- error failed!\n", __func__);
2913                }
2914                peer_abort_upcall(ep);
2915                break;
2916        case ABORTING:
2917                break;
2918        case DEAD:
2919                pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2920                mutex_unlock(&ep->com.mutex);
2921                goto deref_ep;
2922        default:
2923                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2924                break;
2925        }
2926        dst_confirm(ep->dst);
2927        if (ep->com.state != ABORTING) {
2928                __state_set(&ep->com, DEAD);
2929                /* we don't release if we want to retry with mpa_v1 */
2930                if (!ep->retry_with_mpa_v1)
2931                        release = 1;
2932        }
2933        mutex_unlock(&ep->com.mutex);
2934
2935        rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
2936        if (WARN_ON(!rpl_skb)) {
2937                release = 1;
2938                goto out;
2939        }
2940
2941        cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
2942
2943        c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2944out:
2945        if (release)
2946                release_ep_resources(ep);
2947        else if (ep->retry_with_mpa_v1) {
2948                if (ep->com.remote_addr.ss_family == AF_INET6) {
2949                        struct sockaddr_in6 *sin6 =
2950                                        (struct sockaddr_in6 *)
2951                                        &ep->com.local_addr;
2952                        cxgb4_clip_release(
2953                                        ep->com.dev->rdev.lldi.ports[0],
2954                                        (const u32 *)&sin6->sin6_addr.s6_addr,
2955                                        1);
2956                }
2957                xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid);
2958                cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
2959                                 ep->com.local_addr.ss_family);
2960                dst_release(ep->dst);
2961                cxgb4_l2t_release(ep->l2t);
2962                c4iw_reconnect(ep);
2963        }
2964
2965deref_ep:
2966        c4iw_put_ep(&ep->com);
2967        /* Dereferencing ep, referenced in peer_abort_intr() */
2968        c4iw_put_ep(&ep->com);
2969        return 0;
2970}
2971
2972static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2973{
2974        struct c4iw_ep *ep;
2975        struct c4iw_qp_attributes attrs;
2976        struct cpl_close_con_rpl *rpl = cplhdr(skb);
2977        int release = 0;
2978        unsigned int tid = GET_TID(rpl);
2979
2980        ep = get_ep_from_tid(dev, tid);
2981        if (!ep)
2982                return 0;
2983
2984        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2985
2986        /* The cm_id may be null if we failed to connect */
2987        mutex_lock(&ep->com.mutex);
2988        set_bit(CLOSE_CON_RPL, &ep->com.history);
2989        switch (ep->com.state) {
2990        case CLOSING:
2991                __state_set(&ep->com, MORIBUND);
2992                break;
2993        case MORIBUND:
2994                (void)stop_ep_timer(ep);
2995                if ((ep->com.cm_id) && (ep->com.qp)) {
2996                        attrs.next_state = C4IW_QP_STATE_IDLE;
2997                        c4iw_modify_qp(ep->com.qp->rhp,
2998                                             ep->com.qp,
2999                                             C4IW_QP_ATTR_NEXT_STATE,
3000                                             &attrs, 1);
3001                }
3002                close_complete_upcall(ep, 0);
3003                __state_set(&ep->com, DEAD);
3004                release = 1;
3005                break;
3006        case ABORTING:
3007        case DEAD:
3008                break;
3009        default:
3010                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3011                break;
3012        }
3013        mutex_unlock(&ep->com.mutex);
3014        if (release)
3015                release_ep_resources(ep);
3016        c4iw_put_ep(&ep->com);
3017        return 0;
3018}
3019
3020static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
3021{
3022        struct cpl_rdma_terminate *rpl = cplhdr(skb);
3023        unsigned int tid = GET_TID(rpl);
3024        struct c4iw_ep *ep;
3025        struct c4iw_qp_attributes attrs;
3026
3027        ep = get_ep_from_tid(dev, tid);
3028
3029        if (ep) {
3030                if (ep->com.qp) {
3031                        pr_warn("TERM received tid %u qpid %u\n", tid,
3032                                ep->com.qp->wq.sq.qid);
3033                        attrs.next_state = C4IW_QP_STATE_TERMINATE;
3034                        c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
3035                                       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
3036                }
3037
3038                /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
3039                 * when entering the TERM state the RNIC MUST initiate a CLOSE.
3040                 */
3041                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3042                c4iw_put_ep(&ep->com);
3043        } else
3044                pr_warn("TERM received tid %u no ep/qp\n", tid);
3045
3046        return 0;
3047}
3048
3049/*
3050 * Upcall from the adapter indicating data has been transmitted.
3051 * For us its just the single MPA request or reply.  We can now free
3052 * the skb holding the mpa message.
3053 */
3054static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
3055{
3056        struct c4iw_ep *ep;
3057        struct cpl_fw4_ack *hdr = cplhdr(skb);
3058        u8 credits = hdr->credits;
3059        unsigned int tid = GET_TID(hdr);
3060
3061
3062        ep = get_ep_from_tid(dev, tid);
3063        if (!ep)
3064                return 0;
3065        pr_debug("ep %p tid %u credits %u\n",
3066                 ep, ep->hwtid, credits);
3067        if (credits == 0) {
3068                pr_debug("0 credit ack ep %p tid %u state %u\n",
3069                         ep, ep->hwtid, state_read(&ep->com));
3070                goto out;
3071        }
3072
3073        dst_confirm(ep->dst);
3074        if (ep->mpa_skb) {
3075                pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
3076                         ep, ep->hwtid, state_read(&ep->com),
3077                         ep->mpa_attr.initiator ? 1 : 0);
3078                mutex_lock(&ep->com.mutex);
3079                kfree_skb(ep->mpa_skb);
3080                ep->mpa_skb = NULL;
3081                if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
3082                        stop_ep_timer(ep);
3083                mutex_unlock(&ep->com.mutex);
3084        }
3085out:
3086        c4iw_put_ep(&ep->com);
3087        return 0;
3088}
3089
3090int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3091{
3092        int abort;
3093        struct c4iw_ep *ep = to_ep(cm_id);
3094
3095        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3096
3097        mutex_lock(&ep->com.mutex);
3098        if (ep->com.state != MPA_REQ_RCVD) {
3099                mutex_unlock(&ep->com.mutex);
3100                c4iw_put_ep(&ep->com);
3101                return -ECONNRESET;
3102        }
3103        set_bit(ULP_REJECT, &ep->com.history);
3104        if (mpa_rev == 0)
3105                abort = 1;
3106        else
3107                abort = send_mpa_reject(ep, pdata, pdata_len);
3108        mutex_unlock(&ep->com.mutex);
3109
3110        stop_ep_timer(ep);
3111        c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
3112        c4iw_put_ep(&ep->com);
3113        return 0;
3114}
3115
3116int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3117{
3118        int err;
3119        struct c4iw_qp_attributes attrs;
3120        enum c4iw_qp_attr_mask mask;
3121        struct c4iw_ep *ep = to_ep(cm_id);
3122        struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
3123        struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
3124        int abort = 0;
3125
3126        pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3127
3128        mutex_lock(&ep->com.mutex);
3129        if (ep->com.state != MPA_REQ_RCVD) {
3130                err = -ECONNRESET;
3131                goto err_out;
3132        }
3133
3134        if (!qp) {
3135                err = -EINVAL;
3136                goto err_out;
3137        }
3138
3139        set_bit(ULP_ACCEPT, &ep->com.history);
3140        if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
3141            (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
3142                err = -EINVAL;
3143                goto err_abort;
3144        }
3145
3146        if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
3147                if (conn_param->ord > ep->ird) {
3148                        if (RELAXED_IRD_NEGOTIATION) {
3149                                conn_param->ord = ep->ird;
3150                        } else {
3151                                ep->ird = conn_param->ird;
3152                                ep->ord = conn_param->ord;
3153                                send_mpa_reject(ep, conn_param->private_data,
3154                                                conn_param->private_data_len);
3155                                err = -ENOMEM;
3156                                goto err_abort;
3157                        }
3158                }
3159                if (conn_param->ird < ep->ord) {
3160                        if (RELAXED_IRD_NEGOTIATION &&
3161                            ep->ord <= h->rdev.lldi.max_ordird_qp) {
3162                                conn_param->ird = ep->ord;
3163                        } else {
3164                                err = -ENOMEM;
3165                                goto err_abort;
3166                        }
3167                }
3168        }
3169        ep->ird = conn_param->ird;
3170        ep->ord = conn_param->ord;
3171
3172        if (ep->mpa_attr.version == 1) {
3173                if (peer2peer && ep->ird == 0)
3174                        ep->ird = 1;
3175        } else {
3176                if (peer2peer &&
3177                    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
3178                    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
3179                        ep->ird = 1;
3180        }
3181
3182        pr_debug("ird %d ord %d\n", ep->ird, ep->ord);
3183
3184        ep->com.cm_id = cm_id;
3185        ref_cm_id(&ep->com);
3186        ep->com.qp = qp;
3187        ref_qp(ep);
3188
3189        /* bind QP to EP and move to RTS */
3190        attrs.mpa_attr = ep->mpa_attr;
3191        attrs.max_ird = ep->ird;
3192        attrs.max_ord = ep->ord;
3193        attrs.llp_stream_handle = ep;
3194        attrs.next_state = C4IW_QP_STATE_RTS;
3195
3196        /* bind QP and TID with INIT_WR */
3197        mask = C4IW_QP_ATTR_NEXT_STATE |
3198                             C4IW_QP_ATTR_LLP_STREAM_HANDLE |
3199                             C4IW_QP_ATTR_MPA_ATTR |
3200                             C4IW_QP_ATTR_MAX_IRD |
3201                             C4IW_QP_ATTR_MAX_ORD;
3202
3203        err = c4iw_modify_qp(ep->com.qp->rhp,
3204                             ep->com.qp, mask, &attrs, 1);
3205        if (err)
3206                goto err_deref_cm_id;
3207
3208        set_bit(STOP_MPA_TIMER, &ep->com.flags);
3209        err = send_mpa_reply(ep, conn_param->private_data,
3210                             conn_param->private_data_len);
3211        if (err)
3212                goto err_deref_cm_id;
3213
3214        __state_set(&ep->com, FPDU_MODE);
3215        established_upcall(ep);
3216        mutex_unlock(&ep->com.mutex);
3217        c4iw_put_ep(&ep->com);
3218        return 0;
3219err_deref_cm_id:
3220        deref_cm_id(&ep->com);
3221err_abort:
3222        abort = 1;
3223err_out:
3224        mutex_unlock(&ep->com.mutex);
3225        if (abort)
3226                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3227        c4iw_put_ep(&ep->com);
3228        return err;
3229}
3230
3231static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3232{
3233        struct in_device *ind;
3234        int found = 0;
3235        struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3236        struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3237        const struct in_ifaddr *ifa;
3238
3239        ind = in_dev_get(dev->rdev.lldi.ports[0]);
3240        if (!ind)
3241                return -EADDRNOTAVAIL;
3242        rcu_read_lock();
3243        in_dev_for_each_ifa_rcu(ifa, ind) {
3244                if (ifa->ifa_flags & IFA_F_SECONDARY)
3245                        continue;
3246                laddr->sin_addr.s_addr = ifa->ifa_address;
3247                raddr->sin_addr.s_addr = ifa->ifa_address;
3248                found = 1;
3249                break;
3250        }
3251        rcu_read_unlock();
3252
3253        in_dev_put(ind);
3254        return found ? 0 : -EADDRNOTAVAIL;
3255}
3256
3257static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
3258                      unsigned char banned_flags)
3259{
3260        struct inet6_dev *idev;
3261        int err = -EADDRNOTAVAIL;
3262
3263        rcu_read_lock();
3264        idev = __in6_dev_get(dev);
3265        if (idev != NULL) {
3266                struct inet6_ifaddr *ifp;
3267
3268                read_lock_bh(&idev->lock);
3269                list_for_each_entry(ifp, &idev->addr_list, if_list) {
3270                        if (ifp->scope == IFA_LINK &&
3271                            !(ifp->flags & banned_flags)) {
3272                                memcpy(addr, &ifp->addr, 16);
3273                                err = 0;
3274                                break;
3275                        }
3276                }
3277                read_unlock_bh(&idev->lock);
3278        }
3279        rcu_read_unlock();
3280        return err;
3281}
3282
3283static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3284{
3285        struct in6_addr addr;
3286        struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3287        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3288
3289        if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
3290                memcpy(la6->sin6_addr.s6_addr, &addr, 16);
3291                memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
3292                return 0;
3293        }
3294        return -EADDRNOTAVAIL;
3295}
3296
3297int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3298{
3299        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3300        struct c4iw_ep *ep;
3301        int err = 0;
3302        struct sockaddr_in *laddr;
3303        struct sockaddr_in *raddr;
3304        struct sockaddr_in6 *laddr6;
3305        struct sockaddr_in6 *raddr6;
3306        __u8 *ra;
3307        int iptype;
3308
3309        if ((conn_param->ord > cur_max_read_depth(dev)) ||
3310            (conn_param->ird > cur_max_read_depth(dev))) {
3311                err = -EINVAL;
3312                goto out;
3313        }
3314        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3315        if (!ep) {
3316                pr_err("%s - cannot alloc ep\n", __func__);
3317                err = -ENOMEM;
3318                goto out;
3319        }
3320
3321        skb_queue_head_init(&ep->com.ep_skb_list);
3322        if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
3323                err = -ENOMEM;
3324                goto fail1;
3325        }
3326
3327        timer_setup(&ep->timer, ep_timeout, 0);
3328        ep->plen = conn_param->private_data_len;
3329        if (ep->plen)
3330                memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
3331                       conn_param->private_data, ep->plen);
3332        ep->ird = conn_param->ird;
3333        ep->ord = conn_param->ord;
3334
3335        if (peer2peer && ep->ord == 0)
3336                ep->ord = 1;
3337
3338        ep->com.cm_id = cm_id;
3339        ref_cm_id(&ep->com);
3340        cm_id->provider_data = ep;
3341        ep->com.dev = dev;
3342        ep->com.qp = get_qhp(dev, conn_param->qpn);
3343        if (!ep->com.qp) {
3344                pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3345                err = -EINVAL;
3346                goto fail2;
3347        }
3348        ref_qp(ep);
3349        pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn,
3350                 ep->com.qp, cm_id);
3351
3352        /*
3353         * Allocate an active TID to initiate a TCP connection.
3354         */
3355        ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
3356        if (ep->atid == -1) {
3357                pr_err("%s - cannot alloc atid\n", __func__);
3358                err = -ENOMEM;
3359                goto fail2;
3360        }
3361        err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL);
3362        if (err)
3363                goto fail5;
3364
3365        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3366               sizeof(ep->com.local_addr));
3367        memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
3368               sizeof(ep->com.remote_addr));
3369
3370        laddr = (struct sockaddr_in *)&ep->com.local_addr;
3371        raddr = (struct sockaddr_in *)&ep->com.remote_addr;
3372        laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3373        raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
3374
3375        if (cm_id->m_remote_addr.ss_family == AF_INET) {
3376                iptype = 4;
3377                ra = (__u8 *)&raddr->sin_addr;
3378
3379                /*
3380                 * Handle loopback requests to INADDR_ANY.
3381                 */
3382                if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
3383                        err = pick_local_ipaddrs(dev, cm_id);
3384                        if (err)
3385                                goto fail3;
3386                }
3387
3388                /* find a route */
3389                pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
3390                         &laddr->sin_addr, ntohs(laddr->sin_port),
3391                         ra, ntohs(raddr->sin_port));
3392                ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
3393                                          laddr->sin_addr.s_addr,
3394                                          raddr->sin_addr.s_addr,
3395                                          laddr->sin_port,
3396                                          raddr->sin_port, cm_id->tos);
3397        } else {
3398                iptype = 6;
3399                ra = (__u8 *)&raddr6->sin6_addr;
3400
3401                /*
3402                 * Handle loopback requests to INADDR_ANY.
3403                 */
3404                if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3405                        err = pick_local_ip6addrs(dev, cm_id);
3406                        if (err)
3407                                goto fail3;
3408                }
3409
3410                /* find a route */
3411                pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
3412                         laddr6->sin6_addr.s6_addr,
3413                         ntohs(laddr6->sin6_port),
3414                         raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
3415                ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
3416                                           laddr6->sin6_addr.s6_addr,
3417                                           raddr6->sin6_addr.s6_addr,
3418                                           laddr6->sin6_port,
3419                                           raddr6->sin6_port, cm_id->tos,
3420                                           raddr6->sin6_scope_id);
3421        }
3422        if (!ep->dst) {
3423                pr_err("%s - cannot find route\n", __func__);
3424                err = -EHOSTUNREACH;
3425                goto fail3;
3426        }
3427
3428        err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3429                        ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3430        if (err) {
3431                pr_err("%s - cannot alloc l2e\n", __func__);
3432                goto fail4;
3433        }
3434
3435        pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
3436                 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
3437                 ep->l2t->idx);
3438
3439        state_set(&ep->com, CONNECTING);
3440        ep->tos = cm_id->tos;
3441
3442        /* send connect request to rnic */
3443        err = send_connect(ep);
3444        if (!err)
3445                goto out;
3446
3447        cxgb4_l2t_release(ep->l2t);
3448fail4:
3449        dst_release(ep->dst);
3450fail3:
3451        xa_erase_irq(&ep->com.dev->atids, ep->atid);
3452fail5:
3453        cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3454fail2:
3455        skb_queue_purge(&ep->com.ep_skb_list);
3456        deref_cm_id(&ep->com);
3457fail1:
3458        c4iw_put_ep(&ep->com);
3459out:
3460        return err;
3461}
3462
3463static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3464{
3465        int err;
3466        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3467                                    &ep->com.local_addr;
3468
3469        if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3470                err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3471                                     (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3472                if (err)
3473                        return err;
3474        }
3475        c4iw_init_wr_wait(ep->com.wr_waitp);
3476        err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3477                                   ep->stid, &sin6->sin6_addr,
3478                                   sin6->sin6_port,
3479                                   ep->com.dev->rdev.lldi.rxq_ids[0]);
3480        if (!err)
3481                err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3482                                          ep->com.wr_waitp,
3483                                          0, 0, __func__);
3484        else if (err > 0)
3485                err = net_xmit_errno(err);
3486        if (err) {
3487                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3488                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3489                pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3490                       err, ep->stid,
3491                       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3492        }
3493        return err;
3494}
3495
3496static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3497{
3498        int err;
3499        struct sockaddr_in *sin = (struct sockaddr_in *)
3500                                  &ep->com.local_addr;
3501
3502        if (dev->rdev.lldi.enable_fw_ofld_conn) {
3503                do {
3504                        err = cxgb4_create_server_filter(
3505                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
3506                                sin->sin_addr.s_addr, sin->sin_port, 0,
3507                                ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3508                        if (err == -EBUSY) {
3509                                if (c4iw_fatal_error(&ep->com.dev->rdev)) {
3510                                        err = -EIO;
3511                                        break;
3512                                }
3513                                set_current_state(TASK_UNINTERRUPTIBLE);
3514                                schedule_timeout(usecs_to_jiffies(100));
3515                        }
3516                } while (err == -EBUSY);
3517        } else {
3518                c4iw_init_wr_wait(ep->com.wr_waitp);
3519                err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
3520                                ep->stid, sin->sin_addr.s_addr, sin->sin_port,
3521                                0, ep->com.dev->rdev.lldi.rxq_ids[0]);
3522                if (!err)
3523                        err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3524                                                  ep->com.wr_waitp,
3525                                                  0, 0, __func__);
3526                else if (err > 0)
3527                        err = net_xmit_errno(err);
3528        }
3529        if (err)
3530                pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
3531                       , err, ep->stid,
3532                       &sin->sin_addr, ntohs(sin->sin_port));
3533        return err;
3534}
3535
3536int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3537{
3538        int err = 0;
3539        struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3540        struct c4iw_listen_ep *ep;
3541
3542        might_sleep();
3543
3544        ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3545        if (!ep) {
3546                pr_err("%s - cannot alloc ep\n", __func__);
3547                err = -ENOMEM;
3548                goto fail1;
3549        }
3550        skb_queue_head_init(&ep->com.ep_skb_list);
3551        pr_debug("ep %p\n", ep);
3552        ep->com.cm_id = cm_id;
3553        ref_cm_id(&ep->com);
3554        ep->com.dev = dev;
3555        ep->backlog = backlog;
3556        memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3557               sizeof(ep->com.local_addr));
3558
3559        /*
3560         * Allocate a server TID.
3561         */
3562        if (dev->rdev.lldi.enable_fw_ofld_conn &&
3563            ep->com.local_addr.ss_family == AF_INET)
3564                ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3565                                             cm_id->m_local_addr.ss_family, ep);
3566        else
3567                ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3568                                            cm_id->m_local_addr.ss_family, ep);
3569
3570        if (ep->stid == -1) {
3571                pr_err("%s - cannot alloc stid\n", __func__);
3572                err = -ENOMEM;
3573                goto fail2;
3574        }
3575        err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL);
3576        if (err)
3577                goto fail3;
3578
3579        state_set(&ep->com, LISTEN);
3580        if (ep->com.local_addr.ss_family == AF_INET)
3581                err = create_server4(dev, ep);
3582        else
3583                err = create_server6(dev, ep);
3584        if (!err) {
3585                cm_id->provider_data = ep;
3586                goto out;
3587        }
3588        xa_erase_irq(&ep->com.dev->stids, ep->stid);
3589fail3:
3590        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3591                        ep->com.local_addr.ss_family);
3592fail2:
3593        deref_cm_id(&ep->com);
3594        c4iw_put_ep(&ep->com);
3595fail1:
3596out:
3597        return err;
3598}
3599
3600int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3601{
3602        int err;
3603        struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
3604
3605        pr_debug("ep %p\n", ep);
3606
3607        might_sleep();
3608        state_set(&ep->com, DEAD);
3609        if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
3610            ep->com.local_addr.ss_family == AF_INET) {
3611                err = cxgb4_remove_server_filter(
3612                        ep->com.dev->rdev.lldi.ports[0], ep->stid,
3613                        ep->com.dev->rdev.lldi.rxq_ids[0], false);
3614        } else {
3615                struct sockaddr_in6 *sin6;
3616                c4iw_init_wr_wait(ep->com.wr_waitp);
3617                err = cxgb4_remove_server(
3618                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
3619                                ep->com.dev->rdev.lldi.rxq_ids[0],
3620                                ep->com.local_addr.ss_family == AF_INET6);
3621                if (err)
3622                        goto done;
3623                err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
3624                                          0, 0, __func__);
3625                sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3626                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3627                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3628        }
3629        xa_erase_irq(&ep->com.dev->stids, ep->stid);
3630        cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3631                        ep->com.local_addr.ss_family);
3632done:
3633        deref_cm_id(&ep->com);
3634        c4iw_put_ep(&ep->com);
3635        return err;
3636}
3637
3638int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3639{
3640        int ret = 0;
3641        int close = 0;
3642        int fatal = 0;
3643        struct c4iw_rdev *rdev;
3644
3645        mutex_lock(&ep->com.mutex);
3646
3647        pr_debug("ep %p state %s, abrupt %d\n", ep,
3648                 states[ep->com.state], abrupt);
3649
3650        /*
3651         * Ref the ep here in case we have fatal errors causing the
3652         * ep to be released and freed.
3653         */
3654        c4iw_get_ep(&ep->com);
3655
3656        rdev = &ep->com.dev->rdev;
3657        if (c4iw_fatal_error(rdev)) {
3658                fatal = 1;
3659                close_complete_upcall(ep, -EIO);
3660                ep->com.state = DEAD;
3661        }
3662        switch (ep->com.state) {
3663        case MPA_REQ_WAIT:
3664        case MPA_REQ_SENT:
3665        case MPA_REQ_RCVD:
3666        case MPA_REP_SENT:
3667        case FPDU_MODE:
3668        case CONNECTING:
3669                close = 1;
3670                if (abrupt)
3671                        ep->com.state = ABORTING;
3672                else {
3673                        ep->com.state = CLOSING;
3674
3675                        /*
3676                         * if we close before we see the fw4_ack() then we fix
3677                         * up the timer state since we're reusing it.
3678                         */
3679                        if (ep->mpa_skb &&
3680                            test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
3681                                clear_bit(STOP_MPA_TIMER, &ep->com.flags);
3682                                stop_ep_timer(ep);
3683                        }
3684                        start_ep_timer(ep);
3685                }
3686                set_bit(CLOSE_SENT, &ep->com.flags);
3687                break;
3688        case CLOSING:
3689                if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
3690                        close = 1;
3691                        if (abrupt) {
3692                                (void)stop_ep_timer(ep);
3693                                ep->com.state = ABORTING;
3694                        } else
3695                                ep->com.state = MORIBUND;
3696                }
3697                break;
3698        case MORIBUND:
3699        case ABORTING:
3700        case DEAD:
3701                pr_debug("ignoring disconnect ep %p state %u\n",
3702                         ep, ep->com.state);
3703                break;
3704        default:
3705                WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3706                break;
3707        }
3708
3709        if (close) {
3710                if (abrupt) {
3711                        set_bit(EP_DISC_ABORT, &ep->com.history);
3712                        ret = send_abort(ep);
3713                } else {
3714                        set_bit(EP_DISC_CLOSE, &ep->com.history);
3715                        ret = send_halfclose(ep);
3716                }
3717                if (ret) {
3718                        set_bit(EP_DISC_FAIL, &ep->com.history);
3719                        if (!abrupt) {
3720                                stop_ep_timer(ep);
3721                                close_complete_upcall(ep, -EIO);
3722                        }
3723                        if (ep->com.qp) {
3724                                struct c4iw_qp_attributes attrs;
3725
3726                                attrs.next_state = C4IW_QP_STATE_ERROR;
3727                                ret = c4iw_modify_qp(ep->com.qp->rhp,
3728                                                     ep->com.qp,
3729                                                     C4IW_QP_ATTR_NEXT_STATE,
3730                                                     &attrs, 1);
3731                                if (ret)
3732                                        pr_err("%s - qp <- error failed!\n",
3733                                               __func__);
3734                        }
3735                        fatal = 1;
3736                }
3737        }
3738        mutex_unlock(&ep->com.mutex);
3739        c4iw_put_ep(&ep->com);
3740        if (fatal)
3741                release_ep_resources(ep);
3742        return ret;
3743}
3744
3745static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3746                        struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3747{
3748        struct c4iw_ep *ep;
3749        int atid = be32_to_cpu(req->tid);
3750
3751        ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
3752                                           (__force u32) req->tid);
3753        if (!ep)
3754                return;
3755
3756        switch (req->retval) {
3757        case FW_ENOMEM:
3758                set_bit(ACT_RETRY_NOMEM, &ep->com.history);
3759                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3760                        send_fw_act_open_req(ep, atid);
3761                        return;
3762                }
3763                fallthrough;
3764        case FW_EADDRINUSE:
3765                set_bit(ACT_RETRY_INUSE, &ep->com.history);
3766                if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3767                        send_fw_act_open_req(ep, atid);
3768                        return;
3769                }
3770                break;
3771        default:
3772                pr_info("%s unexpected ofld conn wr retval %d\n",
3773                       __func__, req->retval);
3774                break;
3775        }
3776        pr_err("active ofld_connect_wr failure %d atid %d\n",
3777               req->retval, atid);
3778        mutex_lock(&dev->rdev.stats.lock);
3779        dev->rdev.stats.act_ofld_conn_fails++;
3780        mutex_unlock(&dev->rdev.stats.lock);
3781        connect_reply_upcall(ep, status2errno(req->retval));
3782        state_set(&ep->com, DEAD);
3783        if (ep->com.remote_addr.ss_family == AF_INET6) {
3784                struct sockaddr_in6 *sin6 =
3785                        (struct sockaddr_in6 *)&ep->com.local_addr;
3786                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3787                                   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3788        }
3789        xa_erase_irq(&dev->atids, atid);
3790        cxgb4_free_atid(dev->rdev.lldi.tids, atid);
3791        dst_release(ep->dst);
3792        cxgb4_l2t_release(ep->l2t);
3793        c4iw_put_ep(&ep->com);
3794}
3795
3796static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3797                        struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3798{
3799        struct sk_buff *rpl_skb;
3800        struct cpl_pass_accept_req *cpl;
3801        int ret;
3802
3803        rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
3804        if (req->retval) {
3805                pr_err("%s passive open failure %d\n", __func__, req->retval);
3806                mutex_lock(&dev->rdev.stats.lock);
3807                dev->rdev.stats.pas_ofld_conn_fails++;
3808                mutex_unlock(&dev->rdev.stats.lock);
3809                kfree_skb(rpl_skb);
3810        } else {
3811                cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
3812                OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
3813                                        (__force u32) htonl(
3814                                        (__force u32) req->tid)));
3815                ret = pass_accept_req(dev, rpl_skb);
3816                if (!ret)
3817                        kfree_skb(rpl_skb);
3818        }
3819        return;
3820}
3821
3822static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
3823{
3824        u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
3825        u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
3826        u64 t;
3827        u32 shift = 32;
3828
3829        t = (thi << shift) | (tlo >> shift);
3830
3831        return t;
3832}
3833
3834static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
3835{
3836        u32 v;
3837        u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
3838
3839        if (word & 0x1)
3840                shift += 32;
3841        v = (t >> shift) & mask;
3842        return v;
3843}
3844
3845static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
3846{
3847        struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
3848        __be64 *tcb = (__be64 *)(rpl + 1);
3849        unsigned int tid = GET_TID(rpl);
3850        struct c4iw_ep *ep;
3851        u64 t_flags_64;
3852        u32 rx_pdu_out;
3853
3854        ep = get_ep_from_tid(dev, tid);
3855        if (!ep)
3856                return 0;
3857        /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
3858         * determine if there's a rx PDU feedback event pending.
3859         *
3860         * If that bit is set, it means we'll need to re-read the TCB's
3861         * rq_start value. The final value is the one present in a TCB
3862         * with the TF_RX_PDU_OUT bit cleared.
3863         */
3864
3865        t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
3866        rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
3867
3868        c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
3869        c4iw_put_ep(&ep->com); /* from read_tcb() */
3870
3871        /* If TF_RX_PDU_OUT bit is set, re-read the TCB */
3872        if (rx_pdu_out) {
3873                if (++ep->rx_pdu_out_cnt >= 2) {
3874                        WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
3875                        goto cleanup;
3876                }
3877                read_tcb(ep);
3878                return 0;
3879        }
3880
3881        ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M,
3882                                          TCB_RQ_START_S);
3883cleanup:
3884        pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
3885
3886        if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
3887                finish_peer_abort(dev, ep);
3888        else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
3889                send_abort_req(ep);
3890        else
3891                WARN_ONCE(1, "unexpected state!");
3892
3893        return 0;
3894}
3895
3896static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3897{
3898        struct cpl_fw6_msg *rpl = cplhdr(skb);
3899        struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
3900
3901        switch (rpl->type) {
3902        case FW6_TYPE_CQE:
3903                c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
3904                break;
3905        case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3906                req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
3907                switch (req->t_state) {
3908                case TCP_SYN_SENT:
3909                        active_ofld_conn_reply(dev, skb, req);
3910                        break;
3911                case TCP_SYN_RECV:
3912                        passive_ofld_conn_reply(dev, skb, req);
3913                        break;
3914                default:
3915                        pr_err("%s unexpected ofld conn wr state %d\n",
3916                               __func__, req->t_state);
3917                        break;
3918                }
3919                break;
3920        }
3921        return 0;
3922}
3923
3924static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3925{
3926        __be32 l2info;
3927        __be16 hdr_len, vlantag, len;
3928        u16 eth_hdr_len;
3929        int tcp_hdr_len, ip_hdr_len;
3930        u8 intf;
3931        struct cpl_rx_pkt *cpl = cplhdr(skb);
3932        struct cpl_pass_accept_req *req;
3933        struct tcp_options_received tmp_opt;
3934        struct c4iw_dev *dev;
3935        enum chip_type type;
3936
3937        dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3938        /* Store values from cpl_rx_pkt in temporary location. */
3939        vlantag = cpl->vlan;
3940        len = cpl->len;
3941        l2info  = cpl->l2info;
3942        hdr_len = cpl->hdr_len;
3943        intf = cpl->iff;
3944
3945        __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
3946
3947        /*
3948         * We need to parse the TCP options from SYN packet.
3949         * to generate cpl_pass_accept_req.
3950         */
3951        memset(&tmp_opt, 0, sizeof(tmp_opt));
3952        tcp_clear_options(&tmp_opt);
3953        tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
3954
3955        req = __skb_push(skb, sizeof(*req));
3956        memset(req, 0, sizeof(*req));
3957        req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3958                         SYN_MAC_IDX_V(RX_MACIDX_G(
3959                         be32_to_cpu(l2info))) |
3960                         SYN_XACT_MATCH_F);
3961        type = dev->rdev.lldi.adapter_type;
3962        tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
3963        ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
3964        req->hdr_len =
3965                cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
3966        if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
3967                eth_hdr_len = is_t4(type) ?
3968                                RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
3969                                RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
3970                req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
3971                                            IP_HDR_LEN_V(ip_hdr_len) |
3972                                            ETH_HDR_LEN_V(eth_hdr_len));
3973        } else { /* T6 and later */
3974                eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
3975                req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
3976                                            T6_IP_HDR_LEN_V(ip_hdr_len) |
3977                                            T6_ETH_HDR_LEN_V(eth_hdr_len));
3978        }
3979        req->vlan = vlantag;
3980        req->len = len;
3981        req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3982                                    PASS_OPEN_TOS_V(tos));
3983        req->tcpopt.mss = htons(tmp_opt.mss_clamp);
3984        if (tmp_opt.wscale_ok)
3985                req->tcpopt.wsf = tmp_opt.snd_wscale;
3986        req->tcpopt.tstamp = tmp_opt.saw_tstamp;
3987        if (tmp_opt.sack_ok)
3988                req->tcpopt.sack = 1;
3989        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
3990        return;
3991}
3992
3993static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
3994                                  __be32 laddr, __be16 lport,
3995                                  __be32 raddr, __be16 rport,
3996                                  u32 rcv_isn, u32 filter, u16 window,
3997                                  u32 rss_qid, u8 port_id)
3998{
3999        struct sk_buff *req_skb;
4000        struct fw_ofld_connection_wr *req;
4001        struct cpl_pass_accept_req *cpl = cplhdr(skb);
4002        int ret;
4003
4004        req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
4005        if (!req_skb)
4006                return;
4007        req = __skb_put_zero(req_skb, sizeof(*req));
4008        req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
4009        req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
4010        req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
4011        req->le.filter = (__force __be32) filter;
4012        req->le.lport = lport;
4013        req->le.pport = rport;
4014        req->le.u.ipv4.lip = laddr;
4015        req->le.u.ipv4.pip = raddr;
4016        req->tcb.rcv_nxt = htonl(rcv_isn + 1);
4017        req->tcb.rcv_adv = htons(window);
4018        req->tcb.t_state_to_astid =
4019                 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
4020                        FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
4021                        FW_OFLD_CONNECTION_WR_ASTID_V(
4022                        PASS_OPEN_TID_G(ntohl(cpl->tos_stid))));
4023
4024        /*
4025         * We store the qid in opt2 which will be used by the firmware
4026         * to send us the wr response.
4027         */
4028        req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
4029
4030        /*
4031         * We initialize the MSS index in TCB to 0xF.
4032         * So that when driver sends cpl_pass_accept_rpl
4033         * TCB picks up the correct value. If this was 0
4034         * TP will ignore any value > 0 for MSS index.
4035         */
4036        req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
4037        req->cookie = (uintptr_t)skb;
4038
4039        set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
4040        ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
4041        if (ret < 0) {
4042                pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
4043                       ret);
4044                kfree_skb(skb);
4045                kfree_skb(req_skb);
4046        }
4047}
4048
4049/*
4050 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
4051 * messages when a filter is being used instead of server to
4052 * redirect a syn packet. When packets hit filter they are redirected
4053 * to the offload queue and driver tries to establish the connection
4054 * using firmware work request.
4055 */
4056static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
4057{
4058        int stid;
4059        unsigned int filter;
4060        struct ethhdr *eh = NULL;
4061        struct vlan_ethhdr *vlan_eh = NULL;
4062        struct iphdr *iph;
4063        struct tcphdr *tcph;
4064        struct rss_header *rss = (void *)skb->data;
4065        struct cpl_rx_pkt *cpl = (void *)skb->data;
4066        struct cpl_pass_accept_req *req = (void *)(rss + 1);
4067        struct l2t_entry *e;
4068        struct dst_entry *dst;
4069        struct c4iw_ep *lep = NULL;
4070        u16 window;
4071        struct port_info *pi;
4072        struct net_device *pdev;
4073        u16 rss_qid, eth_hdr_len;
4074        int step;
4075        struct neighbour *neigh;
4076
4077        /* Drop all non-SYN packets */
4078        if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F)))
4079                goto reject;
4080
4081        /*
4082         * Drop all packets which did not hit the filter.
4083         * Unlikely to happen.
4084         */
4085        if (!(rss->filter_hit && rss->filter_tid))
4086                goto reject;
4087
4088        /*
4089         * Calculate the server tid from filter hit index from cpl_rx_pkt.
4090         */
4091        stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
4092
4093        lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
4094        if (!lep) {
4095                pr_warn("%s connect request on invalid stid %d\n",
4096                        __func__, stid);
4097                goto reject;
4098        }
4099
4100        switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
4101        case CHELSIO_T4:
4102                eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4103                break;
4104        case CHELSIO_T5:
4105                eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4106                break;
4107        case CHELSIO_T6:
4108                eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4109                break;
4110        default:
4111                pr_err("T%d Chip is not supported\n",
4112                       CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
4113                goto reject;
4114        }
4115
4116        if (eth_hdr_len == ETH_HLEN) {
4117                eh = (struct ethhdr *)(req + 1);
4118                iph = (struct iphdr *)(eh + 1);
4119        } else {
4120                vlan_eh = (struct vlan_ethhdr *)(req + 1);
4121                iph = (struct iphdr *)(vlan_eh + 1);
4122                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
4123        }
4124
4125        if (iph->version != 0x4)
4126                goto reject;
4127
4128        tcph = (struct tcphdr *)(iph + 1);
4129        skb_set_network_header(skb, (void *)iph - (void *)rss);
4130        skb_set_transport_header(skb, (void *)tcph - (void *)rss);
4131        skb_get(skb);
4132
4133        pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n",
4134                 ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
4135                 ntohs(tcph->source), iph->tos);
4136
4137        dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
4138                              iph->daddr, iph->saddr, tcph->dest,
4139                              tcph->source, iph->tos);
4140        if (!dst) {
4141                pr_err("%s - failed to find dst entry!\n", __func__);
4142                goto reject;
4143        }
4144        neigh = dst_neigh_lookup_skb(dst, skb);
4145
4146        if (!neigh) {
4147                pr_err("%s - failed to allocate neigh!\n", __func__);
4148                goto free_dst;
4149        }
4150
4151        if (neigh->dev->flags & IFF_LOOPBACK) {
4152                pdev = ip_dev_find(&init_net, iph->daddr);
4153                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4154                                    pdev, 0);
4155                pi = (struct port_info *)netdev_priv(pdev);
4156                dev_put(pdev);
4157        } else {
4158                pdev = get_real_dev(neigh->dev);
4159                e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4160                                        pdev, 0);
4161                pi = (struct port_info *)netdev_priv(pdev);
4162        }
4163        neigh_release(neigh);
4164        if (!e) {
4165                pr_err("%s - failed to allocate l2t entry!\n",
4166                       __func__);
4167                goto free_dst;
4168        }
4169
4170        step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
4171        rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
4172        window = (__force u16) htons((__force u16)tcph->window);
4173
4174        /* Calcuate filter portion for LE region. */
4175        filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
4176                                                    dev->rdev.lldi.ports[0],
4177                                                    e));
4178
4179        /*
4180         * Synthesize the cpl_pass_accept_req. We have everything except the
4181         * TID. Once firmware sends a reply with TID we update the TID field
4182         * in cpl and pass it through the regular cpl_pass_accept_req path.
4183         */
4184        build_cpl_pass_accept_req(skb, stid, iph->tos);
4185        send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
4186                              tcph->source, ntohl(tcph->seq), filter, window,
4187                              rss_qid, pi->port_id);
4188        cxgb4_l2t_release(e);
4189free_dst:
4190        dst_release(dst);
4191reject:
4192        if (lep)
4193                c4iw_put_ep(&lep->com);
4194        return 0;
4195}
4196
4197/*
4198 * These are the real handlers that are called from a
4199 * work queue.
4200 */
4201static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
4202        [CPL_ACT_ESTABLISH] = act_establish,
4203        [CPL_ACT_OPEN_RPL] = act_open_rpl,
4204        [CPL_RX_DATA] = rx_data,
4205        [CPL_ABORT_RPL_RSS] = abort_rpl,
4206        [CPL_ABORT_RPL] = abort_rpl,
4207        [CPL_PASS_OPEN_RPL] = pass_open_rpl,
4208        [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
4209        [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
4210        [CPL_PASS_ESTABLISH] = pass_establish,
4211        [CPL_PEER_CLOSE] = peer_close,
4212        [CPL_ABORT_REQ_RSS] = peer_abort,
4213        [CPL_CLOSE_CON_RPL] = close_con_rpl,
4214        [CPL_RDMA_TERMINATE] = terminate,
4215        [CPL_FW4_ACK] = fw4_ack,
4216        [CPL_GET_TCB_RPL] = read_tcb_rpl,
4217        [CPL_FW6_MSG] = deferred_fw6_msg,
4218        [CPL_RX_PKT] = rx_pkt,
4219        [FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
4220        [FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
4221};
4222
4223static void process_timeout(struct c4iw_ep *ep)
4224{
4225        struct c4iw_qp_attributes attrs;
4226        int abort = 1;
4227
4228        mutex_lock(&ep->com.mutex);
4229        pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state);
4230        set_bit(TIMEDOUT, &ep->com.history);
4231        switch (ep->com.state) {
4232        case MPA_REQ_SENT:
4233                connect_reply_upcall(ep, -ETIMEDOUT);
4234                break;
4235        case MPA_REQ_WAIT:
4236        case MPA_REQ_RCVD:
4237        case MPA_REP_SENT:
4238        case FPDU_MODE:
4239                break;
4240        case CLOSING:
4241        case MORIBUND:
4242                if (ep->com.cm_id && ep->com.qp) {
4243                        attrs.next_state = C4IW_QP_STATE_ERROR;
4244                        c4iw_modify_qp(ep->com.qp->rhp,
4245                                     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
4246                                     &attrs, 1);
4247                }
4248                close_complete_upcall(ep, -ETIMEDOUT);
4249                break;
4250        case ABORTING:
4251        case DEAD:
4252
4253                /*
4254                 * These states are expected if the ep timed out at the same
4255                 * time as another thread was calling stop_ep_timer().
4256                 * So we silently do nothing for these states.
4257                 */
4258                abort = 0;
4259                break;
4260        default:
4261                WARN(1, "%s unexpected state ep %p tid %u state %u\n",
4262                        __func__, ep, ep->hwtid, ep->com.state);
4263                abort = 0;
4264        }
4265        mutex_unlock(&ep->com.mutex);
4266        if (abort)
4267                c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
4268        c4iw_put_ep(&ep->com);
4269}
4270
4271static void process_timedout_eps(void)
4272{
4273        struct c4iw_ep *ep;
4274
4275        spin_lock_irq(&timeout_lock);
4276        while (!list_empty(&timeout_list)) {
4277                struct list_head *tmp;
4278
4279                tmp = timeout_list.next;
4280                list_del(tmp);
4281                tmp->next = NULL;
4282                tmp->prev = NULL;
4283                spin_unlock_irq(&timeout_lock);
4284                ep = list_entry(tmp, struct c4iw_ep, entry);
4285                process_timeout(ep);
4286                spin_lock_irq(&timeout_lock);
4287        }
4288        spin_unlock_irq(&timeout_lock);
4289}
4290
4291static void process_work(struct work_struct *work)
4292{
4293        struct sk_buff *skb = NULL;
4294        struct c4iw_dev *dev;
4295        struct cpl_act_establish *rpl;
4296        unsigned int opcode;
4297        int ret;
4298
4299        process_timedout_eps();
4300        while ((skb = skb_dequeue(&rxq))) {
4301                rpl = cplhdr(skb);
4302                dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
4303                opcode = rpl->ot.opcode;
4304
4305                if (opcode >= ARRAY_SIZE(work_handlers) ||
4306                    !work_handlers[opcode]) {
4307                        pr_err("No handler for opcode 0x%x.\n", opcode);
4308                        kfree_skb(skb);
4309                } else {
4310                        ret = work_handlers[opcode](dev, skb);
4311                        if (!ret)
4312                                kfree_skb(skb);
4313                }
4314                process_timedout_eps();
4315        }
4316}
4317
4318static DECLARE_WORK(skb_work, process_work);
4319
4320static void ep_timeout(struct timer_list *t)
4321{
4322        struct c4iw_ep *ep = from_timer(ep, t, timer);
4323        int kickit = 0;
4324
4325        spin_lock(&timeout_lock);
4326        if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
4327                /*
4328                 * Only insert if it is not already on the list.
4329                 */
4330                if (!ep->entry.next) {
4331                        list_add_tail(&ep->entry, &timeout_list);
4332                        kickit = 1;
4333                }
4334        }
4335        spin_unlock(&timeout_lock);
4336        if (kickit)
4337                queue_work(workq, &skb_work);
4338}
4339
4340/*
4341 * All the CM events are handled on a work queue to have a safe context.
4342 */
4343static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
4344{
4345
4346        /*
4347         * Save dev in the skb->cb area.
4348         */
4349        *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
4350
4351        /*
4352         * Queue the skb and schedule the worker thread.
4353         */
4354        skb_queue_tail(&rxq, skb);
4355        queue_work(workq, &skb_work);
4356        return 0;
4357}
4358
4359static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
4360{
4361        struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
4362
4363        if (rpl->status != CPL_ERR_NONE) {
4364                pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
4365                       rpl->status, GET_TID(rpl));
4366        }
4367        kfree_skb(skb);
4368        return 0;
4369}
4370
4371static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
4372{
4373        struct cpl_fw6_msg *rpl = cplhdr(skb);
4374        struct c4iw_wr_wait *wr_waitp;
4375        int ret;
4376
4377        pr_debug("type %u\n", rpl->type);
4378
4379        switch (rpl->type) {
4380        case FW6_TYPE_WR_RPL:
4381                ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
4382                wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
4383                pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret);
4384                if (wr_waitp)
4385                        c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0);
4386                kfree_skb(skb);
4387                break;
4388        case FW6_TYPE_CQE:
4389        case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
4390                sched(dev, skb);
4391                break;
4392        default:
4393                pr_err("%s unexpected fw6 msg type %u\n",
4394                       __func__, rpl->type);
4395                kfree_skb(skb);
4396                break;
4397        }
4398        return 0;
4399}
4400
4401static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4402{
4403        struct cpl_abort_req_rss *req = cplhdr(skb);
4404        struct c4iw_ep *ep;
4405        unsigned int tid = GET_TID(req);
4406
4407        ep = get_ep_from_tid(dev, tid);
4408        /* This EP will be dereferenced in peer_abort() */
4409        if (!ep) {
4410                pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
4411                kfree_skb(skb);
4412                return 0;
4413        }
4414        if (cxgb_is_neg_adv(req->status)) {
4415                pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
4416                         ep->hwtid, req->status,
4417                         neg_adv_str(req->status));
4418                goto out;
4419        }
4420        pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state);
4421
4422        c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
4423out:
4424        sched(dev, skb);
4425        return 0;
4426}
4427
4428/*
4429 * Most upcalls from the T4 Core go to sched() to
4430 * schedule the processing on a work queue.
4431 */
4432c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
4433        [CPL_ACT_ESTABLISH] = sched,
4434        [CPL_ACT_OPEN_RPL] = sched,
4435        [CPL_RX_DATA] = sched,
4436        [CPL_ABORT_RPL_RSS] = sched,
4437        [CPL_ABORT_RPL] = sched,
4438        [CPL_PASS_OPEN_RPL] = sched,
4439        [CPL_CLOSE_LISTSRV_RPL] = sched,
4440        [CPL_PASS_ACCEPT_REQ] = sched,
4441        [CPL_PASS_ESTABLISH] = sched,
4442        [CPL_PEER_CLOSE] = sched,
4443        [CPL_CLOSE_CON_RPL] = sched,
4444        [CPL_ABORT_REQ_RSS] = peer_abort_intr,
4445        [CPL_RDMA_TERMINATE] = sched,
4446        [CPL_FW4_ACK] = sched,
4447        [CPL_SET_TCB_RPL] = set_tcb_rpl,
4448        [CPL_GET_TCB_RPL] = sched,
4449        [CPL_FW6_MSG] = fw6_msg,
4450        [CPL_RX_PKT] = sched
4451};
4452
4453int __init c4iw_cm_init(void)
4454{
4455        skb_queue_head_init(&rxq);
4456
4457        workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
4458        if (!workq)
4459                return -ENOMEM;
4460
4461        return 0;
4462}
4463
4464void c4iw_cm_term(void)
4465{
4466        WARN_ON(!list_empty(&timeout_list));
4467        flush_workqueue(workq);
4468        destroy_workqueue(workq);
4469}
4470